8224675: Late GC barrier insertion for ZGC
authorneliasso
Thu, 14 Feb 2019 14:54:05 +0100
changeset 55307 ed12027517c0
parent 55306 ea43db53de91
child 55308 871bc449ce06
child 57400 978ffc56771f
8224675: Late GC barrier insertion for ZGC Reviewed-by: roland, eosterlund, pliden
src/hotspot/cpu/x86/gc/z/z_x86_64.ad
src/hotspot/share/adlc/formssel.cpp
src/hotspot/share/compiler/compilerDirectives.hpp
src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp
src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp
src/hotspot/share/gc/z/zHeap.cpp
src/hotspot/share/gc/z/z_globals.hpp
src/hotspot/share/opto/classes.hpp
src/hotspot/share/opto/compile.cpp
src/hotspot/share/opto/compile.hpp
src/hotspot/share/opto/idealGraphPrinter.cpp
src/hotspot/share/opto/idealGraphPrinter.hpp
src/hotspot/share/opto/lcm.cpp
src/hotspot/share/opto/loopnode.cpp
src/hotspot/share/opto/loopnode.hpp
src/hotspot/share/opto/loopopts.cpp
src/hotspot/share/opto/memnode.cpp
src/hotspot/share/opto/memnode.hpp
src/hotspot/share/opto/node.cpp
src/hotspot/share/opto/node.hpp
src/hotspot/share/opto/phaseX.cpp
src/hotspot/share/opto/phasetype.hpp
src/hotspot/share/opto/vectornode.cpp
src/hotspot/share/runtime/stackValue.cpp
src/hotspot/share/utilities/growableArray.hpp
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad	Thu Feb 14 14:54:05 2019 +0100
@@ -21,6 +21,12 @@
 // questions.
 //
 
+source_hpp %{
+
+#include "gc/z/c2/zBarrierSetC2.hpp"
+
+%}
+
 source %{
 
 #include "gc/z/zBarrierSetAssembler.hpp"
@@ -45,7 +51,7 @@
                                       rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
 
   match(Set dst (LoadBarrierSlowReg src));
-  predicate(UseAVX <= 2);
+  predicate((UseAVX <= 2) && !n->as_LoadBarrierSlowReg()->is_weak());
 
   effect(DEF dst, KILL cr,
          KILL x0, KILL x1, KILL x2, KILL x3,
@@ -74,7 +80,7 @@
                                 rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
 
   match(Set dst (LoadBarrierSlowReg src));
-  predicate(UseAVX == 3);
+  predicate((UseAVX == 3) && !n->as_LoadBarrierSlowReg()->is_weak());
 
   effect(DEF dst, KILL cr,
          KILL x0, KILL x1, KILL x2, KILL x3,
@@ -102,8 +108,8 @@
                                           rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
                                           rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
 
-  match(Set dst (LoadBarrierWeakSlowReg src));
-  predicate(UseAVX <= 2);
+  match(Set dst (LoadBarrierSlowReg src));
+  predicate((UseAVX <= 2) && n->as_LoadBarrierSlowReg()->is_weak());
 
   effect(DEF dst, KILL cr,
          KILL x0, KILL x1, KILL x2, KILL x3,
@@ -131,8 +137,8 @@
                                     rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
                                     rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
 
-  match(Set dst (LoadBarrierWeakSlowReg src));
-  predicate(UseAVX == 3);
+  match(Set dst (LoadBarrierSlowReg src));
+  predicate((UseAVX == 3) && n->as_LoadBarrierSlowReg()->is_weak());
 
   effect(DEF dst, KILL cr,
          KILL x0, KILL x1, KILL x2, KILL x3,
@@ -152,3 +158,58 @@
 
   ins_pipe(pipe_slow);
 %}
+
+// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
+// but doesn't affect output.
+
+instruct z_compareAndExchangeP(
+        memory mem_ptr,
+        rax_RegP oldval, rRegP newval, rRegP keepalive,
+        rFlagsReg cr) %{
+    predicate(VM_Version::supports_cx8());
+    match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval)));
+    effect(KILL cr);
+
+    format %{ "cmpxchgq $mem_ptr,$newval\t# "
+              "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
+    opcode(0x0F, 0xB1);
+    ins_encode(lock_prefix,
+            REX_reg_mem_wide(newval, mem_ptr),
+            OpcP, OpcS,
+            reg_mem(newval, mem_ptr)  // lock cmpxchg
+    );
+    ins_pipe( pipe_cmpxchg );
+%}
+
+instruct z_compareAndSwapP(rRegI res,
+                         memory mem_ptr,
+                         rax_RegP oldval, rRegP newval, rRegP keepalive,
+                         rFlagsReg cr) %{
+  predicate(VM_Version::supports_cx8());
+  match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
+  match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
+  effect(KILL cr, KILL oldval);
+
+  format %{ "cmpxchgq $mem_ptr,$newval\t# "
+            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
+            "sete    $res\n\t"
+            "movzbl  $res, $res" %}
+  opcode(0x0F, 0xB1);
+  ins_encode(lock_prefix,
+          REX_reg_mem_wide(newval, mem_ptr),
+          OpcP, OpcS,
+          reg_mem(newval, mem_ptr),
+          REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
+          REX_reg_breg(res, res), // movzbl
+          Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{
+  match(Set newval (ZGetAndSetP mem (Binary newval keepalive)));
+  format %{ "XCHGQ  $newval,[$mem]" %}
+  ins_encode %{
+    __ xchgq($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
--- a/src/hotspot/share/adlc/formssel.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/adlc/formssel.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -757,7 +757,6 @@
   return NO_MEMORY_OPERAND;
 }
 
-
 // This instruction captures the machine-independent bottom_type
 // Expected use is for pointer vs oop determination for LoadP
 bool InstructForm::captures_bottom_type(FormDict &globals) const {
@@ -775,8 +774,9 @@
        !strcmp(_matrule->_rChild->_opType,"GetAndSetP")   ||
        !strcmp(_matrule->_rChild->_opType,"GetAndSetN")   ||
 #if INCLUDE_ZGC
+       !strcmp(_matrule->_rChild->_opType,"ZGetAndSetP") ||
+       !strcmp(_matrule->_rChild->_opType,"ZCompareAndExchangeP") ||
        !strcmp(_matrule->_rChild->_opType,"LoadBarrierSlowReg") ||
-       !strcmp(_matrule->_rChild->_opType,"LoadBarrierWeakSlowReg") ||
 #endif
 #if INCLUDE_SHENANDOAHGC
        !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") ||
@@ -3506,12 +3506,16 @@
     "CompareAndSwapB", "CompareAndSwapS", "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
     "WeakCompareAndSwapB", "WeakCompareAndSwapS", "WeakCompareAndSwapI", "WeakCompareAndSwapL", "WeakCompareAndSwapP", "WeakCompareAndSwapN",
     "CompareAndExchangeB", "CompareAndExchangeS", "CompareAndExchangeI", "CompareAndExchangeL", "CompareAndExchangeP", "CompareAndExchangeN",
+#if INCLUDE_SHENANDOAHGC
     "ShenandoahCompareAndSwapN", "ShenandoahCompareAndSwapP", "ShenandoahWeakCompareAndSwapP", "ShenandoahWeakCompareAndSwapN", "ShenandoahCompareAndExchangeP", "ShenandoahCompareAndExchangeN",
+#endif
     "StoreCM",
-    "ClearArray",
     "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
     "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
-    "LoadBarrierSlowReg", "LoadBarrierWeakSlowReg"
+#if INCLUDE_ZGC
+    "LoadBarrierSlowReg", "ZGetAndSetP", "ZCompareAndSwapP", "ZCompareAndExchangeP", "ZWeakCompareAndSwapP",
+#endif
+    "ClearArray"
   };
   int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
   if( strcmp(_opType,"PrefetchAllocation")==0 )
--- a/src/hotspot/share/compiler/compilerDirectives.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/compiler/compilerDirectives.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -67,7 +67,7 @@
     cflags(CloneMapDebug,           bool, false, CloneMapDebug) \
     cflags(IGVPrintLevel,           intx, PrintIdealGraphLevel, IGVPrintLevel) \
     cflags(MaxNodeLimit,            intx, MaxNodeLimit, MaxNodeLimit) \
-ZGC_ONLY(cflags(ZOptimizeLoadBarriers, bool, ZOptimizeLoadBarriers, ZOptimizeLoadBarriers))
+ZGC_ONLY(cflags(ZTraceLoadBarriers, bool, false, ZTraceLoadBarriers))
 #else
   #define compilerdirectives_c2_flags(cflags)
 #endif
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -259,6 +259,7 @@
     Optimization,
     Expansion
   };
+
   virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const { return false; }
   virtual void clone_barrier_at_expansion(ArrayCopyNode* ac, Node* call, PhaseIterGVN& igvn) const;
 
@@ -273,7 +274,6 @@
   virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
   virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {}
   virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {}
-  virtual void add_users_to_worklist(Unique_Node_List* worklist) const {}
 
   // Allow barrier sets to have shared state that is preserved across a compilation unit.
   // This could for example comprise macro nodes to be expanded during macro expansion.
@@ -286,17 +286,21 @@
   virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; }
 
   virtual bool has_special_unique_user(const Node* node) const { return false; }
+  virtual bool needs_anti_dependence_check(const Node* node) const { return true; }
+
+  virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const { }
 
   enum CompilePhase {
-    BeforeOptimize, /* post_parse = true */
-    BeforeExpand, /* post_parse = false */
+    BeforeOptimize,
+    BeforeLateInsertion,
+    BeforeMacroExpand,
     BeforeCodeGen
   };
-  virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const {}
 
   virtual bool flatten_gc_alias_type(const TypePtr*& adr_type) const { return false; }
 #ifdef ASSERT
   virtual bool verify_gc_alias_type(const TypePtr* adr_type, int offset) const { return false; }
+  virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const {}
 #endif
 
   virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const { return false; }
@@ -310,8 +314,8 @@
   virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const { return false; };
   virtual bool matcher_is_store_load_barrier(Node* x, uint xop) const { return false; }
 
-  virtual void igvn_add_users_to_worklist(PhaseIterGVN* igvn, Node* use) const {}
-  virtual void ccp_analyze(PhaseCCP* ccp, Unique_Node_List& worklist, Node* use) const {}
+  virtual void igvn_add_users_to_worklist(PhaseIterGVN* igvn, Node* use) const { }
+  virtual void ccp_analyze(PhaseCCP* ccp, Unique_Node_List& worklist, Node* use) const { }
 
   virtual Node* split_if_pre(PhaseIdealLoop* phase, Node* n) const { return NULL; }
   virtual bool build_loop_late_post(PhaseIdealLoop* phase, Node* n) const { return false; }
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -912,8 +912,6 @@
   }
 }
 
-void ShenandoahBarrierSetC2::add_users_to_worklist(Unique_Node_List* worklist) const {}
-
 void* ShenandoahBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
   return new(comp_arena) ShenandoahBarrierSetC2State(comp_arena);
 }
@@ -928,7 +926,7 @@
 
 #ifdef ASSERT
 void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
-  if (ShenandoahVerifyOptoBarriers && phase == BarrierSetC2::BeforeExpand) {
+  if (ShenandoahVerifyOptoBarriers && phase == BarrierSetC2::BeforeMacroExpand) {
     ShenandoahBarrierC2Support::verify(Compile::current()->root());
   } else if (phase == BarrierSetC2::BeforeCodeGen) {
     // Verify G1 pre-barriers
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -126,7 +126,6 @@
   virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
   virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
   virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
-  virtual void add_users_to_worklist(Unique_Node_List* worklist) const;
 
   // Allow barrier sets to have shared state that is preserved across a compilation unit.
   // This could for example comprise macro nodes to be expanded during macro expansion.
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -22,15 +22,22 @@
  */
 
 #include "precompiled.hpp"
+#include "opto/castnode.hpp"
 #include "opto/compile.hpp"
-#include "opto/castnode.hpp"
 #include "opto/escape.hpp"
 #include "opto/graphKit.hpp"
-#include "opto/idealKit.hpp"
 #include "opto/loopnode.hpp"
+#include "opto/machnode.hpp"
 #include "opto/macro.hpp"
+#include "opto/memnode.hpp"
+#include "opto/movenode.hpp"
 #include "opto/node.hpp"
+#include "opto/phase.hpp"
+#include "opto/phaseX.hpp"
+#include "opto/rootnode.hpp"
 #include "opto/type.hpp"
+#include "utilities/copy.hpp"
+#include "utilities/growableArray.hpp"
 #include "utilities/macros.hpp"
 #include "gc/z/zBarrierSet.hpp"
 #include "gc/z/c2/zBarrierSetC2.hpp"
@@ -84,7 +91,7 @@
   if (node->is_Phi()) {
     PhiNode* phi = node->as_Phi();
     Node* n = phi->in(1);
-    if (n != NULL && (n->is_LoadBarrierSlowReg() ||  n->is_LoadBarrierWeakSlowReg())) {
+    if (n != NULL && n->is_LoadBarrierSlowReg()) {
       return true;
     }
   }
@@ -121,50 +128,19 @@
   }
 }
 
-void ZBarrierSetC2::find_dominating_barriers(PhaseIterGVN& igvn) {
-  // Look for dominating barriers on the same address only once all
-  // other loop opts are over. Loop opts may cause a safepoint to be
-  // inserted between a barrier and its dominating barrier.
-  Compile* C = Compile::current();
-  ZBarrierSetC2* bs = (ZBarrierSetC2*)BarrierSet::barrier_set()->barrier_set_c2();
-  ZBarrierSetC2State* s = bs->state();
-  if (s->load_barrier_count() >= 2) {
-    Compile::TracePhase tp("idealLoop", &C->timers[Phase::_t_idealLoop]);
-    PhaseIdealLoop::optimize(igvn, LoopOptsLastRound);
-    if (C->major_progress()) C->print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);
-  }
-}
-
-void ZBarrierSetC2::add_users_to_worklist(Unique_Node_List* worklist) const {
-  // Permanent temporary workaround
-  // Loadbarriers may have non-obvious dead uses keeping them alive during parsing. The use is
-  // removed by RemoveUseless (after parsing, before optimize) but the barriers won't be added to
-  // the worklist. Unless we add them explicitly they are not guaranteed to end up there.
-  ZBarrierSetC2State* s = state();
+static bool load_require_barrier(LoadNode* load)      { return ((load->barrier_data() & RequireBarrier) != 0); }
+static bool load_has_weak_barrier(LoadNode* load)     { return ((load->barrier_data() & WeakBarrier) != 0); }
+static bool load_has_expanded_barrier(LoadNode* load) { return ((load->barrier_data() & ExpandedBarrier) != 0); }
+static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); }
 
-  for (int i = 0; i < s->load_barrier_count(); i++) {
-    LoadBarrierNode* n = s->load_barrier_node(i);
-    worklist->push(n);
+static void load_set_barrier(LoadNode* load, bool weak)    {
+  if (weak) {
+    load->set_barrier_data(WeakBarrier);
+  } else {
+    load->set_barrier_data(RequireBarrier);
   }
 }
 
-const TypeFunc* ZBarrierSetC2::load_barrier_Type() const {
-  const Type** fields;
-
-  // Create input types (domain)
-  fields = TypeTuple::fields(2);
-  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;
-  fields[TypeFunc::Parms+1] = TypeOopPtr::BOTTOM;
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
-  // Create result type (range)
-  fields = TypeTuple::fields(1);
-  fields[TypeFunc::Parms+0] = TypeInstPtr::BOTTOM;
-  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
-
-  return TypeFunc::make(domain, range);
-}
-
 // == LoadBarrierNode ==
 
 LoadBarrierNode::LoadBarrierNode(Compile* C,
@@ -172,13 +148,9 @@
                                  Node* mem,
                                  Node* val,
                                  Node* adr,
-                                 bool weak,
-                                 bool writeback,
-                                 bool oop_reload_allowed) :
+                                 bool weak) :
     MultiNode(Number_of_Inputs),
-    _weak(weak),
-    _writeback(writeback),
-    _oop_reload_allowed(oop_reload_allowed) {
+    _weak(weak) {
   init_req(Control, c);
   init_req(Memory, mem);
   init_req(Oop, val);
@@ -217,8 +189,8 @@
   const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
   const Type* val_t = phase->type(in(Oop));
   floadbarrier[Control] = Type::CONTROL;
-  floadbarrier[Memory] = Type::MEMORY;
-  floadbarrier[Oop] = val_t;
+  floadbarrier[Memory]  = Type::MEMORY;
+  floadbarrier[Oop]     = val_t;
   return TypeTuple::make(Number_of_Outputs, floadbarrier);
 }
 
@@ -238,6 +210,11 @@
 }
 
 LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) {
+  if (is_weak()) {
+    // Weak barriers can't be eliminated
+    return NULL;
+  }
+
   Node* val = in(LoadBarrierNode::Oop);
   if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) {
     LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier();
@@ -266,7 +243,7 @@
     }
   }
 
-  if (ZVerifyLoadBarriers || can_be_eliminated()) {
+  if (can_be_eliminated()) {
     return NULL;
   }
 
@@ -316,7 +293,7 @@
         }
         if (ok) {
           assert(dom_found, "");
-          return u->as_LoadBarrier();;
+          return u->as_LoadBarrier();
         }
         break;
       }
@@ -328,6 +305,7 @@
 
 void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const {
   // Change to that barrier may affect a dominated barrier so re-push those
+  assert(!is_weak(), "sanity");
   Node* val = in(LoadBarrierNode::Oop);
 
   for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
@@ -355,13 +333,9 @@
 }
 
 Node *LoadBarrierNode::Identity(PhaseGVN *phase) {
-  if (!phase->C->directive()->ZOptimizeLoadBarriersOption) {
-    return this;
-  }
-
-  bool redundant_addr = false;
   LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false);
   if (dominating_barrier != NULL) {
+    assert(!is_weak(), "Weak barriers cant be eliminated");
     assert(dominating_barrier->in(Oop) == in(Oop), "");
     return dominating_barrier;
   }
@@ -374,33 +348,33 @@
     return this;
   }
 
-  Node* val = in(Oop);
-  Node* mem = in(Memory);
-  Node* ctrl = in(Control);
-  Node* adr = in(Address);
+  Node *val = in(Oop);
+  Node *mem = in(Memory);
+  Node *ctrl = in(Control);
+
   assert(val->Opcode() != Op_LoadN, "");
+  assert(val->Opcode() != Op_DecodeN, "");
 
   if (mem->is_MergeMem()) {
-    Node* new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
+    Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
     set_req(Memory, new_mem);
     if (mem->outcnt() == 0 && can_reshape) {
       phase->is_IterGVN()->_worklist.push(mem);
     }
-
     return this;
   }
 
-  bool optimizeLoadBarriers = phase->C->directive()->ZOptimizeLoadBarriersOption;
-  LoadBarrierNode* dominating_barrier = optimizeLoadBarriers ? has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress()) : NULL;
-  if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) {
-    assert(in(Address) == dominating_barrier->in(Address), "");
-    set_req(Similar, dominating_barrier->proj_out(Oop));
-    return this;
+  LoadBarrierNode *dominating_barrier = NULL;
+  if (!is_weak()) {
+    dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress());
+    if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) {
+      assert(in(Address) == dominating_barrier->in(Address), "");
+      set_req(Similar, dominating_barrier->proj_out(Oop));
+      return this;
+    }
   }
 
-  bool eliminate = (optimizeLoadBarriers && !(val->is_Phi() || val->Opcode() == Op_LoadP || val->Opcode() == Op_GetAndSetP || val->is_DecodeN())) ||
-                   (can_reshape && (dominating_barrier != NULL || !has_true_uses()));
-
+  bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses());
   if (eliminate) {
     if (can_reshape) {
       PhaseIterGVN* igvn = phase->is_IterGVN();
@@ -415,13 +389,13 @@
       fix_similar_in_uses(igvn);
       if (out_res != NULL) {
         if (dominating_barrier != NULL) {
+          assert(!is_weak(), "Sanity");
           igvn->replace_node(out_res, dominating_barrier->proj_out(Oop));
         } else {
           igvn->replace_node(out_res, val);
         }
       }
     }
-
     return new ConINode(TypeInt::ZERO);
   }
 
@@ -432,7 +406,7 @@
     return this;
   }
 
-  if (can_reshape) {
+  if (can_reshape && !is_weak()) {
     // If this barrier is linked through the Similar edge by a
     // dominated barrier and both barriers have the same Oop field,
     // the dominated barrier can go away, so push it for reprocessing.
@@ -446,10 +420,10 @@
       Node* u = out_res->fast_out(i);
       if (u->is_LoadBarrier() && u->in(Similar) == out_res &&
           (u->in(Oop) == val || !u->in(Similar)->is_top())) {
+        assert(!u->as_LoadBarrier()->is_weak(), "Sanity");
         igvn->_worklist.push(u);
       }
     }
-
     push_dominated_barriers(igvn);
   }
 
@@ -479,211 +453,17 @@
 
 bool LoadBarrierNode::has_true_uses() const {
   Node* out_res = proj_out_or_null(Oop);
-  if (out_res == NULL) {
-    return false;
-  }
-
-  for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
-    Node* u = out_res->fast_out(i);
-    if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
-      return true;
+  if (out_res != NULL) {
+    for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
+      Node *u = out_res->fast_out(i);
+      if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
+        return true;
+      }
     }
   }
-
   return false;
 }
 
-// == Accesses ==
-
-Node* ZBarrierSetC2::make_cas_loadbarrier(C2AtomicParseAccess& access) const {
-  assert(!UseCompressedOops, "Not allowed");
-  CompareAndSwapNode* cas = (CompareAndSwapNode*)access.raw_access();
-  PhaseGVN& gvn = access.gvn();
-  Compile* C = Compile::current();
-  GraphKit* kit = access.kit();
-
-  Node* in_ctrl     = cas->in(MemNode::Control);
-  Node* in_mem      = cas->in(MemNode::Memory);
-  Node* in_adr      = cas->in(MemNode::Address);
-  Node* in_val      = cas->in(MemNode::ValueIn);
-  Node* in_expected = cas->in(LoadStoreConditionalNode::ExpectedIn);
-
-  float likely                   = PROB_LIKELY(0.999);
-
-  const TypePtr *adr_type        = gvn.type(in_adr)->isa_ptr();
-  Compile::AliasType* alias_type = C->alias_type(adr_type);
-  int alias_idx                  = C->get_alias_index(adr_type);
-
-  // Outer check - true: continue, false: load and check
-  Node* region   = new RegionNode(3);
-  Node* phi      = new PhiNode(region, TypeInt::BOOL);
-  Node* phi_mem  = new PhiNode(region, Type::MEMORY, adr_type);
-
-  // Inner check - is the healed ref equal to the expected
-  Node* region2  = new RegionNode(3);
-  Node* phi2     = new PhiNode(region2, TypeInt::BOOL);
-  Node* phi_mem2 = new PhiNode(region2, Type::MEMORY, adr_type);
-
-  // CAS node returns 0 or 1
-  Node* cmp     = gvn.transform(new CmpINode(cas, kit->intcon(0)));
-  Node* bol     = gvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool();
-  IfNode* iff   = gvn.transform(new IfNode(in_ctrl, bol, likely, COUNT_UNKNOWN))->as_If();
-  Node* then    = gvn.transform(new IfTrueNode(iff));
-  Node* elsen   = gvn.transform(new IfFalseNode(iff));
-
-  Node* scmemproj1   = gvn.transform(new SCMemProjNode(cas));
-
-  kit->set_memory(scmemproj1, alias_idx);
-  phi_mem->init_req(1, scmemproj1);
-  phi_mem2->init_req(2, scmemproj1);
-
-  // CAS fail - reload and heal oop
-  Node* reload      = kit->make_load(elsen, in_adr, TypeOopPtr::BOTTOM, T_OBJECT, MemNode::unordered);
-  Node* barrier     = gvn.transform(new LoadBarrierNode(C, elsen, scmemproj1, reload, in_adr, false, true, false));
-  Node* barrierctrl = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Control));
-  Node* barrierdata = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Oop));
-
-  // Check load
-  Node* tmpX    = gvn.transform(new CastP2XNode(NULL, barrierdata));
-  Node* in_expX = gvn.transform(new CastP2XNode(NULL, in_expected));
-  Node* cmp2    = gvn.transform(new CmpXNode(tmpX, in_expX));
-  Node *bol2    = gvn.transform(new BoolNode(cmp2, BoolTest::ne))->as_Bool();
-  IfNode* iff2  = gvn.transform(new IfNode(barrierctrl, bol2, likely, COUNT_UNKNOWN))->as_If();
-  Node* then2   = gvn.transform(new IfTrueNode(iff2));
-  Node* elsen2  = gvn.transform(new IfFalseNode(iff2));
-
-  // redo CAS
-  Node* cas2       = gvn.transform(new CompareAndSwapPNode(elsen2, kit->memory(alias_idx), in_adr, in_val, in_expected, cas->order()));
-  Node* scmemproj2 = gvn.transform(new SCMemProjNode(cas2));
-  kit->set_control(elsen2);
-  kit->set_memory(scmemproj2, alias_idx);
-
-  // Merge inner flow - check if healed oop was equal too expected.
-  region2->set_req(1, kit->control());
-  region2->set_req(2, then2);
-  phi2->set_req(1, cas2);
-  phi2->set_req(2, kit->intcon(0));
-  phi_mem2->init_req(1, scmemproj2);
-  kit->set_memory(phi_mem2, alias_idx);
-
-  // Merge outer flow - then check if first CAS succeeded
-  region->set_req(1, then);
-  region->set_req(2, region2);
-  phi->set_req(1, kit->intcon(1));
-  phi->set_req(2, phi2);
-  phi_mem->init_req(2, phi_mem2);
-  kit->set_memory(phi_mem, alias_idx);
-
-  gvn.transform(region2);
-  gvn.transform(phi2);
-  gvn.transform(phi_mem2);
-  gvn.transform(region);
-  gvn.transform(phi);
-  gvn.transform(phi_mem);
-
-  kit->set_control(region);
-  kit->insert_mem_bar(Op_MemBarCPUOrder);
-
-  return phi;
-}
-
-Node* ZBarrierSetC2::make_cmpx_loadbarrier(C2AtomicParseAccess& access) const {
-  CompareAndExchangePNode* cmpx = (CompareAndExchangePNode*)access.raw_access();
-  GraphKit* kit = access.kit();
-  PhaseGVN& gvn = kit->gvn();
-  Compile* C = Compile::current();
-
-  Node* in_ctrl     = cmpx->in(MemNode::Control);
-  Node* in_mem      = cmpx->in(MemNode::Memory);
-  Node* in_adr      = cmpx->in(MemNode::Address);
-  Node* in_val      = cmpx->in(MemNode::ValueIn);
-  Node* in_expected = cmpx->in(LoadStoreConditionalNode::ExpectedIn);
-
-  float likely                   = PROB_LIKELY(0.999);
-
-  const TypePtr *adr_type        = cmpx->get_ptr_type();
-  Compile::AliasType* alias_type = C->alias_type(adr_type);
-  int alias_idx                  = C->get_alias_index(adr_type);
-
-  // Outer check - true: continue, false: load and check
-  Node* region  = new RegionNode(3);
-  Node* phi     = new PhiNode(region, adr_type);
-
-  // Inner check - is the healed ref equal to the expected
-  Node* region2 = new RegionNode(3);
-  Node* phi2    = new PhiNode(region2, adr_type);
-
-  // Check if cmpx succeeded
-  Node* cmp     = gvn.transform(new CmpPNode(cmpx, in_expected));
-  Node* bol     = gvn.transform(new BoolNode(cmp, BoolTest::eq))->as_Bool();
-  IfNode* iff   = gvn.transform(new IfNode(in_ctrl, bol, likely, COUNT_UNKNOWN))->as_If();
-  Node* then    = gvn.transform(new IfTrueNode(iff));
-  Node* elsen   = gvn.transform(new IfFalseNode(iff));
-
-  Node* scmemproj1  = gvn.transform(new SCMemProjNode(cmpx));
-  kit->set_memory(scmemproj1, alias_idx);
-
-  // CAS fail - reload and heal oop
-  Node* reload      = kit->make_load(elsen, in_adr, TypeOopPtr::BOTTOM, T_OBJECT, MemNode::unordered);
-  Node* barrier     = gvn.transform(new LoadBarrierNode(C, elsen, scmemproj1, reload, in_adr, false, true, false));
-  Node* barrierctrl = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Control));
-  Node* barrierdata = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Oop));
-
-  // Check load
-  Node* tmpX    = gvn.transform(new CastP2XNode(NULL, barrierdata));
-  Node* in_expX = gvn.transform(new CastP2XNode(NULL, in_expected));
-  Node* cmp2    = gvn.transform(new CmpXNode(tmpX, in_expX));
-  Node *bol2    = gvn.transform(new BoolNode(cmp2, BoolTest::ne))->as_Bool();
-  IfNode* iff2  = gvn.transform(new IfNode(barrierctrl, bol2, likely, COUNT_UNKNOWN))->as_If();
-  Node* then2   = gvn.transform(new IfTrueNode(iff2));
-  Node* elsen2  = gvn.transform(new IfFalseNode(iff2));
-
-  // Redo CAS
-  Node* cmpx2      = gvn.transform(new CompareAndExchangePNode(elsen2, kit->memory(alias_idx), in_adr, in_val, in_expected, adr_type, cmpx->get_ptr_type(), cmpx->order()));
-  Node* scmemproj2 = gvn.transform(new SCMemProjNode(cmpx2));
-  kit->set_control(elsen2);
-  kit->set_memory(scmemproj2, alias_idx);
-
-  // Merge inner flow - check if healed oop was equal too expected.
-  region2->set_req(1, kit->control());
-  region2->set_req(2, then2);
-  phi2->set_req(1, cmpx2);
-  phi2->set_req(2, barrierdata);
-
-  // Merge outer flow - then check if first cas succeeded
-  region->set_req(1, then);
-  region->set_req(2, region2);
-  phi->set_req(1, cmpx);
-  phi->set_req(2, phi2);
-
-  gvn.transform(region2);
-  gvn.transform(phi2);
-  gvn.transform(region);
-  gvn.transform(phi);
-
-  kit->set_control(region);
-  kit->set_memory(in_mem, alias_idx);
-  kit->insert_mem_bar(Op_MemBarCPUOrder);
-
-  return phi;
-}
-
-Node* ZBarrierSetC2::load_barrier(GraphKit* kit, Node* val, Node* adr, bool weak, bool writeback, bool oop_reload_allowed) const {
-  PhaseGVN& gvn = kit->gvn();
-  Node* barrier = new LoadBarrierNode(Compile::current(), kit->control(), kit->memory(TypeRawPtr::BOTTOM), val, adr, weak, writeback, oop_reload_allowed);
-  Node* transformed_barrier = gvn.transform(barrier);
-
-  if (transformed_barrier->is_LoadBarrier()) {
-    if (barrier == transformed_barrier) {
-      kit->set_control(gvn.transform(new ProjNode(barrier, LoadBarrierNode::Control)));
-    }
-    Node* result = gvn.transform(new ProjNode(transformed_barrier, LoadBarrierNode::Oop));
-    return result;
-  } else {
-    return val;
-  }
-}
-
 static bool barrier_needed(C2Access& access) {
   return ZBarrierSet::barrier_needed(access.decorators(), access.type());
 }
@@ -695,266 +475,58 @@
   }
 
   bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0;
-
-  assert(access.is_parse_access(), "entry not supported at optimization time");
-  C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
-  GraphKit* kit = parse_access.kit();
-  PhaseGVN& gvn = kit->gvn();
-  Node* adr = access.addr().node();
-  Node* heap_base_oop = access.base();
-  bool unsafe = (access.decorators() & C2_UNSAFE_ACCESS) != 0;
-  if (unsafe) {
-    if (!ZVerifyLoadBarriers) {
-      p = load_barrier(kit, p, adr);
-    } else {
-      if (!TypePtr::NULL_PTR->higher_equal(gvn.type(heap_base_oop))) {
-        p = load_barrier(kit, p, adr);
-      } else {
-        IdealKit ideal(kit);
-        IdealVariable res(ideal);
-#define __ ideal.
-        __ declarations_done();
-        __ set(res, p);
-        __ if_then(heap_base_oop, BoolTest::ne, kit->null(), PROB_UNLIKELY(0.999)); {
-          kit->sync_kit(ideal);
-          p = load_barrier(kit, p, adr);
-          __ set(res, p);
-          __ sync_kit(kit);
-        } __ end_if();
-        kit->final_sync(ideal);
-        p = __ value(res);
-#undef __
-      }
-    }
-    return p;
-  } else {
-    return load_barrier(parse_access.kit(), p, access.addr().node(), weak, true, true);
+  if (p->isa_Load()) {
+    load_set_barrier(p->as_Load(), weak);
   }
+  return p;
 }
 
 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
                                                     Node* new_val, const Type* val_type) const {
   Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
-  if (!barrier_needed(access)) {
-    return result;
+  LoadStoreNode* lsn = result->as_LoadStore();
+  if (barrier_needed(access)) {
+    lsn->set_has_barrier();
   }
-
-  access.set_needs_pinning(false);
-  return make_cmpx_loadbarrier(access);
+  return lsn;
 }
 
 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
                                                      Node* new_val, const Type* value_type) const {
   Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
-  if (!barrier_needed(access)) {
-    return result;
+  LoadStoreNode* lsn = result->as_LoadStore();
+  if (barrier_needed(access)) {
+    lsn->set_has_barrier();
   }
-
-  Node* load_store = access.raw_access();
-  bool weak_cas = (access.decorators() & C2_WEAK_CMPXCHG) != 0;
-  bool expected_is_null = (expected_val->get_ptr_type() == TypePtr::NULL_PTR);
-
-  if (!expected_is_null) {
-    if (weak_cas) {
-      access.set_needs_pinning(false);
-      load_store = make_cas_loadbarrier(access);
-    } else {
-      access.set_needs_pinning(false);
-      load_store = make_cas_loadbarrier(access);
-    }
-  }
-
-  return load_store;
+  return lsn;
 }
 
 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
   Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
-  if (!barrier_needed(access)) {
-    return result;
+  LoadStoreNode* lsn = result->as_LoadStore();
+  if (barrier_needed(access)) {
+    lsn->set_has_barrier();
   }
-
-  Node* load_store = access.raw_access();
-  Node* adr = access.addr().node();
-
-  assert(access.is_parse_access(), "entry not supported at optimization time");
-  C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
-  return load_barrier(parse_access.kit(), load_store, adr, false, false, false);
+  return lsn;
 }
 
 // == Macro Expansion ==
 
+// Optimized, low spill, loadbarrier variant using stub specialized on register used
 void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const {
-  Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
-  Node* in_mem  = barrier->in(LoadBarrierNode::Memory);
-  Node* in_val  = barrier->in(LoadBarrierNode::Oop);
-  Node* in_adr  = barrier->in(LoadBarrierNode::Address);
-
-  Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control);
-  Node* out_res  = barrier->proj_out(LoadBarrierNode::Oop);
-
-  PhaseIterGVN &igvn = phase->igvn();
-
-  if (ZVerifyLoadBarriers) {
-    igvn.replace_node(out_res, in_val);
-    igvn.replace_node(out_ctrl, in_ctrl);
-    return;
-  }
-
-  if (barrier->can_be_eliminated()) {
-    // Clone and pin the load for this barrier below the dominating
-    // barrier: the load cannot be allowed to float above the
-    // dominating barrier
-    Node* load = in_val;
-
-    if (load->is_Load()) {
-      Node* new_load = load->clone();
-      Node* addp = new_load->in(MemNode::Address);
-      assert(addp->is_AddP() || addp->is_Phi() || addp->is_Load(), "bad address");
-      Node* cast = new CastPPNode(addp, igvn.type(addp), true);
-      Node* ctrl = NULL;
-      Node* similar = barrier->in(LoadBarrierNode::Similar);
-      if (similar->is_Phi()) {
-        // already expanded
-        ctrl = similar->in(0);
-      } else {
-        assert(similar->is_Proj() && similar->in(0)->is_LoadBarrier(), "unexpected graph shape");
-        ctrl = similar->in(0)->as_LoadBarrier()->proj_out(LoadBarrierNode::Control);
-      }
-      assert(ctrl != NULL, "bad control");
-      cast->set_req(0, ctrl);
-      igvn.transform(cast);
-      new_load->set_req(MemNode::Address, cast);
-      igvn.transform(new_load);
-
-      igvn.replace_node(out_res, new_load);
-      igvn.replace_node(out_ctrl, in_ctrl);
-      return;
-    }
-    // cannot eliminate
-  }
-
-  // There are two cases that require the basic loadbarrier
-  // 1) When the writeback of a healed oop must be avoided (swap)
-  // 2) When we must guarantee that no reload of is done (swap, cas, cmpx)
-  if (!barrier->is_writeback()) {
-    assert(!barrier->oop_reload_allowed(), "writeback barriers should be marked as requires oop");
-  }
-
-  if (!barrier->oop_reload_allowed()) {
-    expand_loadbarrier_basic(phase, barrier);
-  } else {
-    expand_loadbarrier_optimized(phase, barrier);
-  }
-}
-
-// Basic loadbarrier using conventional argument passing
-void ZBarrierSetC2::expand_loadbarrier_basic(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const {
   PhaseIterGVN &igvn = phase->igvn();
-
-  Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
-  Node* in_mem  = barrier->in(LoadBarrierNode::Memory);
-  Node* in_val  = barrier->in(LoadBarrierNode::Oop);
-  Node* in_adr  = barrier->in(LoadBarrierNode::Address);
-
-  Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control);
-  Node* out_res  = barrier->proj_out(LoadBarrierNode::Oop);
-
   float unlikely  = PROB_UNLIKELY(0.999);
-  const Type* in_val_maybe_null_t = igvn.type(in_val);
-
-  Node* jthread = igvn.transform(new ThreadLocalNode());
-  Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset()));
-  Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr, TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(), MemNode::unordered));
-  Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val));
-  Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask));
-  Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type())));
-  Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool();
-  IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If();
-  Node* then = igvn.transform(new IfTrueNode(iff));
-  Node* elsen = igvn.transform(new IfFalseNode(iff));
-
-  Node* result_region;
-  Node* result_val;
-
-  result_region = new RegionNode(3);
-  result_val = new PhiNode(result_region, TypeInstPtr::BOTTOM);
-
-  result_region->set_req(1, elsen);
-  Node* res = igvn.transform(new CastPPNode(in_val, in_val_maybe_null_t));
-  res->init_req(0, elsen);
-  result_val->set_req(1, res);
-
-  const TypeFunc *tf = load_barrier_Type();
-  Node* call;
-  if (barrier->is_weak()) {
-    call = new CallLeafNode(tf,
-                            ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr(),
-                            "ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded",
-                            TypeRawPtr::BOTTOM);
-  } else {
-    call = new CallLeafNode(tf,
-                            ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(),
-                            "ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded",
-                            TypeRawPtr::BOTTOM);
-  }
-
-  call->init_req(TypeFunc::Control, then);
-  call->init_req(TypeFunc::I_O    , phase->top());
-  call->init_req(TypeFunc::Memory , in_mem);
-  call->init_req(TypeFunc::FramePtr, phase->top());
-  call->init_req(TypeFunc::ReturnAdr, phase->top());
-  call->init_req(TypeFunc::Parms+0, in_val);
-  if (barrier->is_writeback()) {
-    call->init_req(TypeFunc::Parms+1, in_adr);
-  } else {
-    // When slow path is called with a null address, the healed oop will not be written back
-    call->init_req(TypeFunc::Parms+1, igvn.zerocon(T_OBJECT));
-  }
-  call = igvn.transform(call);
-
-  Node* ctrl = igvn.transform(new ProjNode(call, TypeFunc::Control));
-  res = igvn.transform(new ProjNode(call, TypeFunc::Parms));
-  res = igvn.transform(new CheckCastPPNode(ctrl, res, in_val_maybe_null_t));
-
-  result_region->set_req(2, ctrl);
-  result_val->set_req(2, res);
-
-  result_region = igvn.transform(result_region);
-  result_val = igvn.transform(result_val);
-
-  if (out_ctrl != NULL) { // Added if cond
-    igvn.replace_node(out_ctrl, result_region);
-  }
-  igvn.replace_node(out_res, result_val);
-}
-
-// Optimized, low spill, loadbarrier variant using stub specialized on register used
-void ZBarrierSetC2::expand_loadbarrier_optimized(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const {
-  PhaseIterGVN &igvn = phase->igvn();
-#ifdef PRINT_NODE_TRAVERSALS
-  Node* preceding_barrier_node = barrier->in(LoadBarrierNode::Oop);
-#endif
 
   Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
   Node* in_mem = barrier->in(LoadBarrierNode::Memory);
   Node* in_val = barrier->in(LoadBarrierNode::Oop);
   Node* in_adr = barrier->in(LoadBarrierNode::Address);
 
-  Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control);
+  Node* out_ctrl = barrier->proj_out_or_null(LoadBarrierNode::Control);
   Node* out_res = barrier->proj_out(LoadBarrierNode::Oop);
 
   assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null");
 
-#ifdef PRINT_NODE_TRAVERSALS
-  tty->print("\n\n\nBefore barrier optimization:\n");
-  traverse(barrier, out_ctrl, out_res, -1);
-
-  tty->print("\nBefore barrier optimization:  preceding_barrier_node\n");
-  traverse(preceding_barrier_node, out_ctrl, out_res, -1);
-#endif
-
-  float unlikely  = PROB_UNLIKELY(0.999);
-
   Node* jthread = igvn.transform(new ThreadLocalNode());
   Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset()));
   Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr,
@@ -968,17 +540,9 @@
   Node* then = igvn.transform(new IfTrueNode(iff));
   Node* elsen = igvn.transform(new IfFalseNode(iff));
 
-  Node* slow_path_surrogate;
-  if (!barrier->is_weak()) {
-    slow_path_surrogate = igvn.transform(new LoadBarrierSlowRegNode(then, in_mem, in_adr, in_val->adr_type(),
-                                                                    (const TypePtr*) in_val->bottom_type(), MemNode::unordered));
-  } else {
-    slow_path_surrogate = igvn.transform(new LoadBarrierWeakSlowRegNode(then, in_mem, in_adr, in_val->adr_type(),
-                                                                        (const TypePtr*) in_val->bottom_type(), MemNode::unordered));
-  }
+  Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_mem, in_adr, in_val->adr_type(),
+                                                                    (const TypePtr*) in_val->bottom_type(), MemNode::unordered, barrier->is_weak()));
 
-  Node *new_loadp;
-  new_loadp = slow_path_surrogate;
   // Create the final region/phi pair to converge cntl/data paths to downstream code
   Node* result_region = igvn.transform(new RegionNode(3));
   result_region->set_req(1, then);
@@ -988,37 +552,28 @@
   result_phi->set_req(1, new_loadp);
   result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop));
 
-  // Finally, connect the original outputs to the barrier region and phi to complete the expansion/substitution
-  // igvn.replace_node(out_ctrl, result_region);
-  if (out_ctrl != NULL) { // added if cond
+  if (out_ctrl != NULL) {
     igvn.replace_node(out_ctrl, result_region);
   }
   igvn.replace_node(out_res, result_phi);
 
   assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!");
 
-#ifdef PRINT_NODE_TRAVERSALS
-  tty->print("\nAfter barrier optimization:  old out_ctrl\n");
-  traverse(out_ctrl, out_ctrl, out_res, -1);
-  tty->print("\nAfter barrier optimization:  old out_res\n");
-  traverse(out_res, out_ctrl, out_res, -1);
-  tty->print("\nAfter barrier optimization:  old barrier\n");
-  traverse(barrier, out_ctrl, out_res, -1);
-  tty->print("\nAfter barrier optimization:  preceding_barrier_node\n");
-  traverse(preceding_barrier_node, result_region, result_phi, -1);
-#endif
+  igvn.remove_dead_node(barrier);
+  igvn.remove_dead_node(out_ctrl);
+  igvn.remove_dead_node(out_res);
 
   assert(is_gc_barrier_node(result_phi), "sanity");
   assert(step_over_gc_barrier(result_phi) == in_val, "sanity");
+
+  phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx);
 }
 
 bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
   ZBarrierSetC2State* s = state();
   if (s->load_barrier_count() > 0) {
     PhaseMacroExpand macro(igvn);
-#ifdef ASSERT
-    verify_gc_barriers(false);
-#endif
+
     int skipped = 0;
     while (s->load_barrier_count() > skipped) {
       int load_barrier_count = s->load_barrier_count();
@@ -1059,374 +614,6 @@
   return false;
 }
 
-// == Loop optimization ==
-
-static bool replace_with_dominating_barrier(PhaseIdealLoop* phase, LoadBarrierNode* lb, bool last_round) {
-  PhaseIterGVN &igvn = phase->igvn();
-  Compile* C = Compile::current();
-
-  LoadBarrierNode* lb2 = lb->has_dominating_barrier(phase, false, last_round);
-  if (lb2 == NULL) {
-    return false;
-  }
-
-  if (lb->in(LoadBarrierNode::Oop) != lb2->in(LoadBarrierNode::Oop)) {
-    assert(lb->in(LoadBarrierNode::Address) == lb2->in(LoadBarrierNode::Address), "Invalid address");
-    igvn.replace_input_of(lb, LoadBarrierNode::Similar, lb2->proj_out(LoadBarrierNode::Oop));
-    C->set_major_progress();
-    return false;
-  }
-
-  // That transformation may cause the Similar edge on dominated load barriers to be invalid
-  lb->fix_similar_in_uses(&igvn);
-
-  Node* val = lb->proj_out(LoadBarrierNode::Oop);
-  assert(lb2->has_true_uses(), "Invalid uses");
-  assert(lb2->in(LoadBarrierNode::Oop) == lb->in(LoadBarrierNode::Oop), "Invalid oop");
-  phase->lazy_update(lb, lb->in(LoadBarrierNode::Control));
-  phase->lazy_replace(lb->proj_out(LoadBarrierNode::Control), lb->in(LoadBarrierNode::Control));
-  igvn.replace_node(val, lb2->proj_out(LoadBarrierNode::Oop));
-
-  return true;
-}
-
-static Node* find_dominating_memory(PhaseIdealLoop* phase, Node* mem, Node* dom, int i) {
-  assert(dom->is_Region() || i == -1, "");
-
-  Node* m = mem;
-  while(phase->is_dominator(dom, phase->has_ctrl(m) ? phase->get_ctrl(m) : m->in(0))) {
-    if (m->is_Mem()) {
-      assert(m->as_Mem()->adr_type() == TypeRawPtr::BOTTOM, "");
-      m = m->in(MemNode::Memory);
-    } else if (m->is_MergeMem()) {
-      m = m->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
-    } else if (m->is_Phi()) {
-      if (m->in(0) == dom && i != -1) {
-        m = m->in(i);
-        break;
-      } else {
-        m = m->in(LoopNode::EntryControl);
-      }
-    } else if (m->is_Proj()) {
-      m = m->in(0);
-    } else if (m->is_SafePoint() || m->is_MemBar()) {
-      m = m->in(TypeFunc::Memory);
-    } else {
-#ifdef ASSERT
-      m->dump();
-#endif
-      ShouldNotReachHere();
-    }
-  }
-
-  return m;
-}
-
-static LoadBarrierNode* clone_load_barrier(PhaseIdealLoop* phase, LoadBarrierNode* lb, Node* ctl, Node* mem, Node* oop_in) {
-  PhaseIterGVN &igvn = phase->igvn();
-  Compile* C = Compile::current();
-  Node* the_clone = lb->clone();
-  the_clone->set_req(LoadBarrierNode::Control, ctl);
-  the_clone->set_req(LoadBarrierNode::Memory, mem);
-  if (oop_in != NULL) {
-    the_clone->set_req(LoadBarrierNode::Oop, oop_in);
-  }
-
-  LoadBarrierNode* new_lb = the_clone->as_LoadBarrier();
-  igvn.register_new_node_with_optimizer(new_lb);
-  IdealLoopTree *loop = phase->get_loop(new_lb->in(0));
-  phase->set_ctrl(new_lb, new_lb->in(0));
-  phase->set_loop(new_lb, loop);
-  phase->set_idom(new_lb, new_lb->in(0), phase->dom_depth(new_lb->in(0))+1);
-  if (!loop->_child) {
-    loop->_body.push(new_lb);
-  }
-
-  Node* proj_ctl = new ProjNode(new_lb, LoadBarrierNode::Control);
-  igvn.register_new_node_with_optimizer(proj_ctl);
-  phase->set_ctrl(proj_ctl, proj_ctl->in(0));
-  phase->set_loop(proj_ctl, loop);
-  phase->set_idom(proj_ctl, new_lb, phase->dom_depth(new_lb)+1);
-  if (!loop->_child) {
-    loop->_body.push(proj_ctl);
-  }
-
-  Node* proj_oop = new ProjNode(new_lb, LoadBarrierNode::Oop);
-  phase->register_new_node(proj_oop, new_lb);
-
-  if (!new_lb->in(LoadBarrierNode::Similar)->is_top()) {
-    LoadBarrierNode* similar = new_lb->in(LoadBarrierNode::Similar)->in(0)->as_LoadBarrier();
-    if (!phase->is_dominator(similar, ctl)) {
-      igvn.replace_input_of(new_lb, LoadBarrierNode::Similar, C->top());
-    }
-  }
-
-  return new_lb;
-}
-
-static void replace_barrier(PhaseIdealLoop* phase, LoadBarrierNode* lb, Node* new_val) {
-  PhaseIterGVN &igvn = phase->igvn();
-  Node* val = lb->proj_out(LoadBarrierNode::Oop);
-  igvn.replace_node(val, new_val);
-  phase->lazy_update(lb, lb->in(LoadBarrierNode::Control));
-  phase->lazy_replace(lb->proj_out(LoadBarrierNode::Control), lb->in(LoadBarrierNode::Control));
-}
-
-static bool split_barrier_thru_phi(PhaseIdealLoop* phase, LoadBarrierNode* lb) {
-  PhaseIterGVN &igvn = phase->igvn();
-  Compile* C = Compile::current();
-
-  if (lb->in(LoadBarrierNode::Oop)->is_Phi()) {
-    Node* oop_phi = lb->in(LoadBarrierNode::Oop);
-
-    if ((oop_phi->req() != 3) || (oop_phi->in(2) == oop_phi)) {
-      // Ignore phis with only one input
-      return false;
-    }
-
-    if (phase->is_dominator(phase->get_ctrl(lb->in(LoadBarrierNode::Address)),
-                            oop_phi->in(0)) && phase->get_ctrl(lb->in(LoadBarrierNode::Address)) != oop_phi->in(0)) {
-      // That transformation may cause the Similar edge on dominated load barriers to be invalid
-      lb->fix_similar_in_uses(&igvn);
-
-      RegionNode* region = oop_phi->in(0)->as_Region();
-
-      int backedge = LoopNode::LoopBackControl;
-      if (region->is_Loop() && region->in(backedge)->is_Proj() && region->in(backedge)->in(0)->is_If()) {
-        Node* c = region->in(backedge)->in(0)->in(0);
-        assert(c->unique_ctrl_out() == region->in(backedge)->in(0), "");
-        Node* oop = lb->in(LoadBarrierNode::Oop)->in(backedge);
-        Node* oop_c = phase->has_ctrl(oop) ? phase->get_ctrl(oop) : oop;
-        if (!phase->is_dominator(oop_c, c)) {
-          return false;
-        }
-      }
-
-      // If the node on the backedge above the phi is the node itself - we have a self loop.
-      // Don't clone - this will be folded later.
-      if (oop_phi->in(LoopNode::LoopBackControl) == lb->proj_out(LoadBarrierNode::Oop)) {
-        return false;
-      }
-
-      bool is_strip_mined = region->is_CountedLoop() && region->as_CountedLoop()->is_strip_mined();
-      Node *phi = oop_phi->clone();
-
-      for (uint i = 1; i < region->req(); i++) {
-        Node* ctrl = region->in(i);
-        if (ctrl != C->top()) {
-          assert(!phase->is_dominator(ctrl, region) || region->is_Loop(), "");
-
-          Node* mem = lb->in(LoadBarrierNode::Memory);
-          Node* m = find_dominating_memory(phase, mem, region, i);
-
-          if (region->is_Loop() && i == LoopNode::LoopBackControl && ctrl->is_Proj() && ctrl->in(0)->is_If()) {
-            ctrl = ctrl->in(0)->in(0);
-          } else if (region->is_Loop() && is_strip_mined) {
-            // If this is a strip mined loop, control must move above OuterStripMinedLoop
-            assert(i == LoopNode::EntryControl, "check");
-            assert(ctrl->is_OuterStripMinedLoop(), "sanity");
-            ctrl = ctrl->as_OuterStripMinedLoop()->in(LoopNode::EntryControl);
-          }
-
-          LoadBarrierNode* new_lb = clone_load_barrier(phase, lb, ctrl, m, lb->in(LoadBarrierNode::Oop)->in(i));
-          Node* out_ctrl = new_lb->proj_out(LoadBarrierNode::Control);
-
-          if (is_strip_mined && (i == LoopNode::EntryControl)) {
-            assert(region->in(i)->is_OuterStripMinedLoop(), "");
-            igvn.replace_input_of(region->in(i), i, out_ctrl);
-            phase->set_idom(region->in(i), out_ctrl, phase->dom_depth(out_ctrl));
-          } else if (ctrl == region->in(i)) {
-            igvn.replace_input_of(region, i, out_ctrl);
-            // Only update the idom if is the loop entry we are updating
-            // - A loop backedge doesn't change the idom
-            if (region->is_Loop() && i == LoopNode::EntryControl) {
-              phase->set_idom(region, out_ctrl, phase->dom_depth(out_ctrl));
-            }
-          } else {
-            Node* iff = region->in(i)->in(0);
-            igvn.replace_input_of(iff, 0, out_ctrl);
-            phase->set_idom(iff, out_ctrl, phase->dom_depth(out_ctrl)+1);
-          }
-          phi->set_req(i, new_lb->proj_out(LoadBarrierNode::Oop));
-        }
-      }
-      phase->register_new_node(phi, region);
-      replace_barrier(phase, lb, phi);
-
-      if (region->is_Loop()) {
-        // Load barrier moved to the back edge of the Loop may now
-        // have a safepoint on the path to the barrier on the Similar
-        // edge
-        igvn.replace_input_of(phi->in(LoopNode::LoopBackControl)->in(0), LoadBarrierNode::Similar, C->top());
-        Node* head = region->in(LoopNode::EntryControl);
-        phase->set_idom(region, head, phase->dom_depth(head)+1);
-        phase->recompute_dom_depth();
-        if (head->is_CountedLoop() && head->as_CountedLoop()->is_main_loop()) {
-          head->as_CountedLoop()->set_normal_loop();
-        }
-      }
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-static bool move_out_of_loop(PhaseIdealLoop* phase, LoadBarrierNode* lb) {
-  PhaseIterGVN &igvn = phase->igvn();
-  IdealLoopTree *lb_loop = phase->get_loop(lb->in(0));
-  if (lb_loop != phase->ltree_root() && !lb_loop->_irreducible) {
-    Node* oop_ctrl = phase->get_ctrl(lb->in(LoadBarrierNode::Oop));
-    IdealLoopTree *oop_loop = phase->get_loop(oop_ctrl);
-    IdealLoopTree* adr_loop = phase->get_loop(phase->get_ctrl(lb->in(LoadBarrierNode::Address)));
-    if (!lb_loop->is_member(oop_loop) && !lb_loop->is_member(adr_loop)) {
-      // That transformation may cause the Similar edge on dominated load barriers to be invalid
-      lb->fix_similar_in_uses(&igvn);
-
-      Node* head = lb_loop->_head;
-      assert(head->is_Loop(), "");
-
-      if (phase->is_dominator(head, oop_ctrl)) {
-        assert(oop_ctrl->Opcode() == Op_CProj && oop_ctrl->in(0)->Opcode() == Op_NeverBranch, "");
-        assert(lb_loop->is_member(phase->get_loop(oop_ctrl->in(0)->in(0))), "");
-        return false;
-      }
-
-      if (head->is_CountedLoop()) {
-        CountedLoopNode* cloop = head->as_CountedLoop();
-        if (cloop->is_main_loop()) {
-          cloop->set_normal_loop();
-        }
-        // When we are moving barrier out of a counted loop,
-        // make sure we move it all the way out of the strip mined outer loop.
-        if (cloop->is_strip_mined()) {
-          head = cloop->outer_loop();
-        }
-      }
-
-      Node* mem = lb->in(LoadBarrierNode::Memory);
-      Node* m = find_dominating_memory(phase, mem, head, -1);
-
-      LoadBarrierNode* new_lb = clone_load_barrier(phase, lb, head->in(LoopNode::EntryControl), m, NULL);
-
-      assert(phase->idom(head) == head->in(LoopNode::EntryControl), "");
-      Node* proj_ctl = new_lb->proj_out(LoadBarrierNode::Control);
-      igvn.replace_input_of(head, LoopNode::EntryControl, proj_ctl);
-      phase->set_idom(head, proj_ctl, phase->dom_depth(proj_ctl) + 1);
-
-      replace_barrier(phase, lb, new_lb->proj_out(LoadBarrierNode::Oop));
-
-      phase->recompute_dom_depth();
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-static bool common_barriers(PhaseIdealLoop* phase, LoadBarrierNode* lb) {
-  PhaseIterGVN &igvn = phase->igvn();
-  Node* in_val = lb->in(LoadBarrierNode::Oop);
-  for (DUIterator_Fast imax, i = in_val->fast_outs(imax); i < imax; i++) {
-    Node* u = in_val->fast_out(i);
-    if (u != lb && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) {
-      Node* this_ctrl = lb->in(LoadBarrierNode::Control);
-      Node* other_ctrl = u->in(LoadBarrierNode::Control);
-
-      Node* lca = phase->dom_lca(this_ctrl, other_ctrl);
-      Node* proj1 = NULL;
-      Node* proj2 = NULL;
-      bool ok = (lb->in(LoadBarrierNode::Address) == u->in(LoadBarrierNode::Address));
-
-      while (this_ctrl != lca && ok) {
-        if (this_ctrl->in(0) != NULL &&
-            this_ctrl->in(0)->is_MultiBranch()) {
-          if (this_ctrl->in(0)->in(0) == lca) {
-            assert(proj1 == NULL, "");
-            assert(this_ctrl->is_Proj(), "");
-            proj1 = this_ctrl;
-          } else if (!(this_ctrl->in(0)->is_If() && this_ctrl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none))) {
-            ok = false;
-          }
-        }
-        this_ctrl = phase->idom(this_ctrl);
-      }
-      while (other_ctrl != lca && ok) {
-        if (other_ctrl->in(0) != NULL &&
-            other_ctrl->in(0)->is_MultiBranch()) {
-          if (other_ctrl->in(0)->in(0) == lca) {
-            assert(other_ctrl->is_Proj(), "");
-            assert(proj2 == NULL, "");
-            proj2 = other_ctrl;
-          } else if (!(other_ctrl->in(0)->is_If() && other_ctrl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none))) {
-            ok = false;
-          }
-        }
-        other_ctrl = phase->idom(other_ctrl);
-      }
-      assert(proj1 == NULL || proj2 == NULL || proj1->in(0) == proj2->in(0), "");
-      if (ok && proj1 && proj2 && proj1 != proj2 && proj1->in(0)->is_If()) {
-        // That transformation may cause the Similar edge on dominated load barriers to be invalid
-        lb->fix_similar_in_uses(&igvn);
-        u->as_LoadBarrier()->fix_similar_in_uses(&igvn);
-
-        Node* split = lca->unique_ctrl_out();
-        assert(split->in(0) == lca, "");
-
-        Node* mem = lb->in(LoadBarrierNode::Memory);
-        Node* m = find_dominating_memory(phase, mem, split, -1);
-        LoadBarrierNode* new_lb = clone_load_barrier(phase, lb, lca, m, NULL);
-
-        Node* proj_ctl = new_lb->proj_out(LoadBarrierNode::Control);
-        igvn.replace_input_of(split, 0, new_lb->proj_out(LoadBarrierNode::Control));
-        phase->set_idom(split, proj_ctl, phase->dom_depth(proj_ctl)+1);
-
-        Node* proj_oop = new_lb->proj_out(LoadBarrierNode::Oop);
-        replace_barrier(phase, lb, proj_oop);
-        replace_barrier(phase, u->as_LoadBarrier(), proj_oop);
-
-        phase->recompute_dom_depth();
-
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-void ZBarrierSetC2::loop_optimize_gc_barrier(PhaseIdealLoop* phase, Node* node, bool last_round) {
-  if (!Compile::current()->directive()->ZOptimizeLoadBarriersOption) {
-    return;
-  }
-
-  if (!node->is_LoadBarrier()) {
-    return;
-  }
-
-  if (!node->as_LoadBarrier()->has_true_uses()) {
-    return;
-  }
-
-  if (replace_with_dominating_barrier(phase, node->as_LoadBarrier(), last_round)) {
-    return;
-  }
-
-  if (split_barrier_thru_phi(phase, node->as_LoadBarrier())) {
-    return;
-  }
-
-  if (move_out_of_loop(phase, node->as_LoadBarrier())) {
-    return;
-  }
-
-  if (common_barriers(phase, node->as_LoadBarrier())) {
-    return;
-  }
-}
-
 Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const {
   Node* node = c;
 
@@ -1444,7 +631,7 @@
   if (node->is_Phi()) {
     PhiNode* phi = node->as_Phi();
     Node* n = phi->in(1);
-    if (n != NULL && (n->is_LoadBarrierSlowReg() ||  n->is_LoadBarrierWeakSlowReg())) {
+    if (n != NULL && n->is_LoadBarrierSlowReg()) {
       assert(c == node, "projections from step 1 should only be seen before macro expansion");
       return phi->in(2);
     }
@@ -1458,35 +645,76 @@
 }
 
 bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const {
-  if (opcode != Op_LoadBarrierSlowReg &&
-      opcode != Op_LoadBarrierWeakSlowReg) {
-    return false;
-  }
-
+  switch (opcode) {
+    case Op_LoadBarrier:
+      assert(0, "There should be no load barriers left");
+    case Op_ZGetAndSetP:
+    case Op_ZCompareAndExchangeP:
+    case Op_ZCompareAndSwapP:
+    case Op_ZWeakCompareAndSwapP:
+    case Op_LoadBarrierSlowReg:
 #ifdef ASSERT
-  if (VerifyOptoOopOffsets) {
-    MemNode* mem  = n->as_Mem();
-    // Check to see if address types have grounded out somehow.
-    const TypeInstPtr* tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
-    ciInstanceKlass* k = tp->klass()->as_instance_klass();
-    bool oop_offset_is_sane = k->contains_field_offset(tp->offset());
-    assert(!tp || oop_offset_is_sane, "");
+      if (VerifyOptoOopOffsets) {
+        MemNode *mem = n->as_Mem();
+        // Check to see if address types have grounded out somehow.
+        const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
+        ciInstanceKlass *k = tp->klass()->as_instance_klass();
+        bool oop_offset_is_sane = k->contains_field_offset(tp->offset());
+        assert(!tp || oop_offset_is_sane, "");
+      }
+#endif
+      return true;
+    default:
+      return false;
   }
-#endif
-
-  return true;
 }
 
 bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const {
-  if (opcode == Op_CallLeaf &&
-      (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() ||
-       n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr())) {
-    mem_op = true;
-    mem_addr_idx = TypeFunc::Parms + 1;
-    return true;
+  switch(opcode) {
+    case Op_CallLeaf:
+      if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() ||
+          n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) {
+        mem_op = true;
+        mem_addr_idx = TypeFunc::Parms + 1;
+        return true;
+      }
+      return false;
+    default:
+      return false;
   }
+}
 
-  return false;
+bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const {
+  switch(opcode) {
+    case Op_ZCompareAndExchangeP:
+    case Op_ZCompareAndSwapP:
+    case Op_ZWeakCompareAndSwapP: {
+      Node *mem = n->in(MemNode::Address);
+      Node *keepalive = n->in(5);
+      Node *pair1 = new BinaryNode(mem, keepalive);
+
+      Node *newval = n->in(MemNode::ValueIn);
+      Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
+      Node *pair2 = new BinaryNode(oldval, newval);
+
+      n->set_req(MemNode::Address, pair1);
+      n->set_req(MemNode::ValueIn, pair2);
+      n->del_req(5);
+      n->del_req(LoadStoreConditionalNode::ExpectedIn);
+      return true;
+    }
+    case Op_ZGetAndSetP: {
+      Node *keepalive = n->in(4);
+      Node *newval = n->in(MemNode::ValueIn);
+      Node *pair = new BinaryNode(newval, keepalive);
+      n->set_req(MemNode::ValueIn, pair);
+      n->del_req(4);
+      return true;
+    }
+
+    default:
+      return false;
+  }
 }
 
 // == Verification ==
@@ -1519,16 +747,32 @@
 }
 
 void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
-  if (phase == BarrierSetC2::BeforeCodeGen) return;
-  bool post_parse = phase == BarrierSetC2::BeforeOptimize;
-  verify_gc_barriers(post_parse);
+  switch(phase) {
+    case BarrierSetC2::BeforeOptimize:
+    case BarrierSetC2::BeforeLateInsertion:
+      assert(state()->load_barrier_count() == 0, "No barriers inserted yet");
+      break;
+    case BarrierSetC2::BeforeMacroExpand:
+      // Barrier placement should be set by now.
+      verify_gc_barriers(false /*post_parse*/);
+      break;
+    case BarrierSetC2::BeforeCodeGen:
+      // Barriers has been fully expanded.
+      assert(state()->load_barrier_count() == 0, "No more macro barriers");
+      break;
+    default:
+      assert(0, "Phase without verification");
+  }
 }
 
+// post_parse implies that there might be load barriers without uses after parsing
+// That only applies when adding barriers at parse time.
 void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const {
   ZBarrierSetC2State* s = state();
   Compile* C = Compile::current();
   ResourceMark rm;
   VectorSet visited(Thread::current()->resource_area());
+
   for (int i = 0; i < s->load_barrier_count(); i++) {
     LoadBarrierNode* n = s->load_barrier_node(i);
 
@@ -1542,7 +786,7 @@
             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)),
            "broken similar edge");
 
-    assert(post_parse || n->as_LoadBarrier()->has_true_uses(),
+    assert(n->as_LoadBarrier()->has_true_uses(),
            "found unneeded load barrier");
 
     // Several load barrier nodes chained through their Similar edge
@@ -1557,87 +801,834 @@
       Unique_Node_List wq;
       Node* other = n->in(LoadBarrierNode::Similar)->in(0);
       wq.push(n);
-      bool ok = true;
-      bool dom_found = false;
       for (uint next = 0; next < wq.size(); ++next) {
-        Node *n = wq.at(next);
-        assert(n->is_CFG(), "");
-        assert(!n->is_SafePoint(), "");
+        Node *nn = wq.at(next);
+        assert(nn->is_CFG(), "");
+        assert(!nn->is_SafePoint(), "");
 
-        if (n == other) {
+        if (nn == other) {
           continue;
         }
 
-        if (n->is_Region()) {
-          for (uint i = 1; i < n->req(); i++) {
-            Node* m = n->in(i);
+        if (nn->is_Region()) {
+          for (uint i = 1; i < nn->req(); i++) {
+            Node* m = nn->in(i);
             if (m != NULL) {
               wq.push(m);
             }
           }
         } else {
-          Node* m = n->in(0);
+          Node* m = nn->in(0);
           if (m != NULL) {
             wq.push(m);
           }
         }
       }
     }
+  }
+}
 
-    if (ZVerifyLoadBarriers) {
-      if ((n->is_Load() || n->is_LoadStore()) && n->bottom_type()->make_oopptr() != NULL) {
-        visited.Clear();
-        bool found = look_for_barrier(n, post_parse, visited);
-        if (!found) {
-          n->dump(1);
-          n->dump(-3);
-          stringStream ss;
-          C->method()->print_short_name(&ss);
-          tty->print_cr("-%s-", ss.as_string());
-          assert(found, "");
+#endif // end verification code
+
+static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl);
+
+// This code is cloning all uses of a load that is between a call and the catch blocks,
+// to each use.
+
+static bool fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) {
+
+  if (!phase->has_ctrl(node)) {
+    // This node is floating - doesn't need to be cloned.
+    assert(node != start_ctrl, "check");
+    return false;
+  }
+
+  Node* ctrl = phase->get_ctrl(node);
+  if (ctrl != start_ctrl) {
+    // We are in a successor block - the node is ok.
+    return false; // Unwind
+  }
+
+  // Process successor nodes
+  int outcnt = node->outcnt();
+  for (int i = 0; i < outcnt; i++) {
+    Node* n = node->raw_out(0);
+    assert(!n->is_LoadBarrier(), "Sanity");
+    // Calling recursively, visiting leafs first
+    fixup_uses_in_catch(phase, start_ctrl, n);
+  }
+
+  // Now all successors are outside
+  // - Clone this node to both successors
+  int no_succs = node->outcnt();
+  assert(!node->is_Store(), "Stores not expected here");
+
+  // In some very rare cases a load that doesn't need a barrier will end up here
+  // Treat it as a LoadP and the insertion of phis will be done correctly.
+  if (node->is_Load()) {
+    assert(node->as_Load()->barrier_data() == 0, "Sanity");
+    call_catch_cleanup_one(phase, node->as_Load(), phase->get_ctrl(node));
+  } else {
+    for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) {
+      Node* use = node->fast_out(i);
+      Node* clone = node->clone();
+      assert(clone->outcnt() == 0, "");
+
+      assert(use->find_edge(node) != -1, "check");
+      phase->igvn().rehash_node_delayed(use);
+      use->replace_edge(node, clone);
+
+      Node* new_ctrl;
+      if (use->is_block_start()) {
+        new_ctrl = use;
+      } else if (use->is_CFG()) {
+        new_ctrl = use->in(0);
+        assert (new_ctrl != NULL, "");
+      } else {
+        new_ctrl = phase->get_ctrl(use);
+      }
+
+      phase->set_ctrl(clone, new_ctrl);
+
+      if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx);
+      phase->igvn().register_new_node_with_optimizer(clone);
+      --i, --jmax;
+    }
+    assert(node->outcnt() == 0, "must be empty now");
+
+    // Node node is dead.
+    phase->igvn().remove_dead_node(node);
+  }
+  return true; // unwind - return if a use was processed
+}
+
+// Clone a load to a specific catch_proj
+static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) {
+  Node* cloned_load = load->clone();
+  cloned_load->set_req(0, catch_proj);      // set explicit control
+  phase->set_ctrl(cloned_load, catch_proj); // update
+  if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx);
+  phase->igvn().register_new_node_with_optimizer(cloned_load);
+  return cloned_load;
+}
+
+static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) {
+  Node* region = node;
+  while (!region->isa_Region()) {
+    Node *up = phase->idom(region);
+    assert(up != region, "Must not loop");
+    assert(up != stop,   "Must not find original control");
+    region = up;
+  }
+  return region;
+}
+
+// Clone this load to each catch block
+static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) {
+  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
+  phase->igvn().set_delay_transform(true);
+
+  // Verify pre conditions
+  assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj");
+  assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch");
+
+  if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) {
+    if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx);
+    return;
+  }
+
+  // Process the loads successor nodes - if any is between
+  // the call and the catch blocks, they need to be cloned to.
+  // This is done recursively
+  int outcnt = load->outcnt();
+  uint index = 0;
+  for (int i = 0; i < outcnt; i++) {
+    if (index < load->outcnt()) {
+      Node *n = load->raw_out(index);
+      assert(!n->is_LoadBarrier(), "Sanity");
+      if (!fixup_uses_in_catch(phase, ctrl, n)) {
+        // if no successor was cloned, progress to next out.
+        index++;
+      }
+    }
+  }
+
+  // Now all the loads uses has been cloned down
+  // Only thing left is to clone the loads, but they must end up
+  // first in the catch blocks.
+
+  // We clone the loads oo the catch blocks only when needed.
+  // An array is used to map the catch blocks to each lazily cloned load.
+  // In that way no extra unnecessary loads are cloned.
+
+  // Any use dominated by original block must have an phi and a region added
+
+  Node* catch_node = ctrl->raw_out(0);
+  int number_of_catch_projs = catch_node->outcnt();
+  Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
+  Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
+
+  // The phi_map is used to keep track of where phis have already been inserted
+  int phi_map_len = phase->C->unique();
+  Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len);
+  Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len);
+
+  for (unsigned int i = 0; i  < load->outcnt(); i++) {
+    Node* load_use_control = NULL;
+    Node* load_use = load->raw_out(i);
+
+    if (phase->has_ctrl(load_use)) {
+      load_use_control = phase->get_ctrl(load_use);
+    } else {
+      load_use_control = load_use->in(0);
+    }
+    assert(load_use_control != NULL, "sanity");
+    if (trace) tty->print_cr("  Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx);
+
+    // Some times the loads use is a phi. For them we need to determine from which catch block
+    // the use is defined.
+    bool load_use_is_phi = false;
+    unsigned int load_use_phi_index = 0;
+    Node* phi_ctrl = NULL;
+    if (load_use->is_Phi()) {
+      // Find phi input that matches load
+      for (unsigned int u = 1; u < load_use->req(); u++) {
+        if (load_use->in(u) == load) {
+          load_use_is_phi = true;
+          load_use_phi_index = u;
+          assert(load_use->in(0)->is_Region(), "Region or broken");
+          phi_ctrl = load_use->in(0)->in(u);
+          assert(phi_ctrl->is_CFG(), "check");
+          assert(phi_ctrl != load,   "check");
+          break;
+        }
+      }
+      assert(load_use_is_phi,        "must find");
+      assert(load_use_phi_index > 0, "sanity");
+    }
+
+    // For each load use, see which catch projs dominates, create load clone lazily and reconnect
+    bool found_dominating_catchproj = false;
+    for (int c = 0; c < number_of_catch_projs; c++) {
+      Node* catchproj = catch_node->raw_out(c);
+      assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity");
+
+      if (!phase->is_dominator(catchproj, load_use_control)) {
+        if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) {
+          // The loads use is local to the catchproj.
+          // fall out and replace load with catch-local load clone.
+        } else {
+          continue;
+        }
+      }
+      assert(!found_dominating_catchproj, "Max one should match");
+
+      // Clone loads to catch projs
+      Node* load_clone = proj_to_load_mapping[c];
+      if (load_clone == NULL) {
+        load_clone = clone_load_to_catchproj(phase, load, catchproj);
+        proj_to_load_mapping[c] = load_clone;
+      }
+      phase->igvn().rehash_node_delayed(load_use);
+
+      if (load_use_is_phi) {
+        // phis are special - the load is defined from a specific control flow
+        load_use->set_req(load_use_phi_index, load_clone);
+      } else {
+        // Multipe edges can be replaced at once - on calls for example
+        load_use->replace_edge(load, load_clone);
+      }
+      --i; // more than one edge can have been removed, but the next is in later iterations
+
+      // We could break the for-loop after finding a dominating match.
+      // But keep iterating to catch any bad idom early.
+      found_dominating_catchproj = true;
+    }
+
+    // We found no single catchproj that dominated the use - The use is at a point after
+    // where control flow from multiple catch projs have merged. We will have to create
+    // phi nodes before the use and tie the output from the cloned loads together. It
+    // can be a single phi or a number of chained phis, depending on control flow
+    if (!found_dominating_catchproj) {
+
+      // Use phi-control if use is a phi
+      if (load_use_is_phi) {
+        load_use_control = phi_ctrl;
+      }
+      assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator");
+
+      // Clone a load on all paths
+      for (int c = 0; c < number_of_catch_projs; c++) {
+        Node* catchproj = catch_node->raw_out(c);
+        Node* load_clone = proj_to_load_mapping[c];
+        if (load_clone == NULL) {
+          load_clone = clone_load_to_catchproj(phase, load, catchproj);
+          proj_to_load_mapping[c] = load_clone;
+        }
+      }
+
+      // Move up dominator tree from use until dom front is reached
+      Node* next_region = get_dominating_region(phase, load_use_control, ctrl);
+      while (phase->idom(next_region) != catch_node) {
+        next_region = phase->idom(next_region);
+        if (trace) tty->print_cr("Moving up idom to region ctrl %i", next_region->_idx);
+      }
+      assert(phase->is_dominator(catch_node, next_region), "Sanity");
+
+      // Create or reuse phi node that collect all cloned loads and feed it to the use.
+      Node* test_phi = phi_map[next_region->_idx];
+      if ((test_phi != NULL) && test_phi->is_Phi()) {
+        // Reuse an already created phi
+        if (trace) tty->print_cr("    Using cached Phi %i on load_use %i", test_phi->_idx, load_use->_idx);
+        phase->igvn().rehash_node_delayed(load_use);
+        load_use->replace_edge(load, test_phi);
+        // Now this use is done
+      } else {
+        // Otherwise we need to create one or more phis
+        PhiNode* next_phi = new PhiNode(next_region, load->type());
+        phi_map[next_region->_idx] = next_phi; // cache new phi
+        phase->igvn().rehash_node_delayed(load_use);
+        load_use->replace_edge(load, next_phi);
+
+        int dominators_of_region = 0;
+        do {
+          // New phi, connect to region and add all loads as in.
+          Node* region = next_region;
+          assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes");
+          PhiNode* new_phi = next_phi;
+
+          if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx);
+
+          // Need to add all cloned loads to the phi, taking care that the right path is matched
+          dominators_of_region = 0; // reset for new region
+          for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) {
+            Node* region_pred = region->in(reg_i);
+            assert(region_pred->is_CFG(), "check");
+            bool pred_has_dominator = false;
+            for (int c = 0; c < number_of_catch_projs; c++) {
+              Node* catchproj = catch_node->raw_out(c);
+              if (phase->is_dominator(catchproj, region_pred)) {
+                new_phi->set_req(reg_i, proj_to_load_mapping[c]);
+                if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx);
+                pred_has_dominator = true;
+                dominators_of_region++;
+                break;
+              }
+            }
+
+            // Sometimes we need to chain several phis.
+            if (!pred_has_dominator) {
+              assert(dominators_of_region <= 1, "More than one region can't require extra phi");
+              if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx);
+              // Continue search on on this region_pred
+              // - walk up to next region
+              // - create a new phi and connect to first new_phi
+              next_region = get_dominating_region(phase, region_pred, ctrl);
+
+              // Lookup if there already is a phi, create a new otherwise
+              Node* test_phi = phi_map[next_region->_idx];
+              if ((test_phi != NULL) && test_phi->is_Phi()) {
+                next_phi = test_phi->isa_Phi();
+                dominators_of_region++; // record that a match was found and that we are done
+                if (trace) tty->print_cr("    Using cached phi Phi %i on control %i", next_phi->_idx, next_region->_idx);
+              } else {
+                next_phi = new PhiNode(next_region, load->type());
+                phi_map[next_region->_idx] = next_phi;
+              }
+              new_phi->set_req(reg_i, next_phi);
+            }
+          }
+
+          new_phi->set_req(0, region);
+          phase->igvn().register_new_node_with_optimizer(new_phi);
+          phase->set_ctrl(new_phi, region);
+
+          assert(dominators_of_region != 0, "Must have found one this iteration");
+        } while (dominators_of_region == 1);
+      }
+      --i;
+    }
+  } // end of loop over uses
+
+  assert(load->outcnt() == 0, "All uses should be handled");
+  phase->igvn().remove_dead_node(load);
+  phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx);
+
+  // Now we should be home
+  phase->igvn().set_delay_transform(false);
+}
+
+// Sort out the loads that are between a call ant its catch blocks
+static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load) {
+  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
+
+  Node* ctrl = phase->get_ctrl(load);
+  if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) {
+    return;
+  }
+
+  Node* catch_node = ctrl->isa_Proj()->raw_out(0);
+  if (catch_node->is_Catch()) {
+    if (catch_node->outcnt() > 1) {
+      call_catch_cleanup_one(phase, load, ctrl);
+    } else {
+      if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx);
+    }
+  }
+}
+
+void ZBarrierSetC2::barrier_insertion_phase(Compile* C, PhaseIterGVN& igvn) const {
+  PhaseIdealLoop::optimize(igvn, LoopOptsZBarrierInsertion);
+  if (C->failing())  return;
+}
+
+bool ZBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const {
+
+  if (mode == LoopOptsZBarrierInsertion) {
+    // First make sure all loads between call and catch are moved to the catch block
+    clean_catch_blocks(phase);
+
+    // Then expand barriers on all loads
+    insert_load_barriers(phase);
+
+    // Handle all Unsafe that need barriers.
+    insert_barriers_on_unsafe(phase);
+
+    phase->C->clear_major_progress();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static bool can_simplify_cas(LoadStoreNode* node) {
+  if (node->isa_LoadStoreConditional()) {
+    Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn);
+    return (expected_in->get_ptr_type() == TypePtr::NULL_PTR);
+  } else {
+    return false;
+  }
+}
+
+static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) {
+
+  Compile *C = phase->C;
+  PhaseIterGVN &igvn = phase->igvn();
+  LoadStoreNode* zclone = NULL;
+  bool is_weak = false;
+
+  Node *in_ctrl = old_node->in(MemNode::Control);
+  Node *in_mem  = old_node->in(MemNode::Memory);
+  Node *in_adr  = old_node->in(MemNode::Address);
+  Node *in_val  = old_node->in(MemNode::ValueIn);
+  const TypePtr *adr_type = old_node->adr_type();
+  const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding
+
+  switch (old_node->Opcode()) {
+    case Op_CompareAndExchangeP: {
+      zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
+              adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order());
+      load_type = old_node->bottom_type()->is_ptr();
+      break;
+    }
+    case Op_WeakCompareAndSwapP: {
+      if (can_simplify_cas(old_node)) {
+        break;
+      }
+      is_weak  = true;
+      zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
+              ((CompareAndSwapNode*)old_node)->order());
+      adr_type = TypePtr::BOTTOM;
+      break;
+    }
+    case Op_CompareAndSwapP: {
+      if (can_simplify_cas(old_node)) {
+        break;
+      }
+      zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
+              ((CompareAndSwapNode*)old_node)->order());
+      adr_type = TypePtr::BOTTOM;
+      break;
+    }
+    case Op_GetAndSetP: {
+      zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type());
+      load_type = old_node->bottom_type()->is_ptr();
+      break;
+    }
+  }
+  if (zclone != NULL) {
+    igvn.register_new_node_with_optimizer(zclone, old_node);
+
+    // Make load
+    LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered,
+                                    LoadNode::DependsOnlyOnTest);
+    load_set_expanded_barrier(load);
+    igvn.register_new_node_with_optimizer(load);
+    igvn.replace_node(old_node, zclone);
+
+    Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, is_weak);
+    Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
+    Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
+
+    igvn.register_new_node_with_optimizer(barrier);
+    igvn.register_new_node_with_optimizer(barrier_val);
+    igvn.register_new_node_with_optimizer(barrier_ctrl);
+
+    // loop over all of in_ctrl usages and move to barrier_ctrl
+    for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) {
+      Node *use = in_ctrl->last_out(i);
+      uint l;
+      for (l = 0; use->in(l) != in_ctrl; l++) {}
+      igvn.replace_input_of(use, l, barrier_ctrl);
+    }
+
+    load->set_req(MemNode::Control, in_ctrl);
+    barrier->set_req(LoadBarrierNode::Control, in_ctrl);
+    zclone->add_req(barrier_val); // add req as keep alive.
+
+    C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx);
+  }
+}
+
+void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const {
+  Compile *C = phase->C;
+  PhaseIterGVN &igvn = phase->igvn();
+  uint new_ids = C->unique();
+  VectorSet visited(Thread::current()->resource_area());
+  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
+  nodeStack.push(C->root());
+  visited.test_set(C->root()->_idx);
+
+  // Traverse all nodes, visit all unsafe ops that require a barrier
+  while (nodeStack.length() > 0) {
+    Node *n = nodeStack.pop();
+
+    bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
+    if (is_old_node) {
+      if (n->is_LoadStore()) {
+        LoadStoreNode* lsn = n->as_LoadStore();
+        if (lsn->has_barrier()) {
+          BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type();
+          assert ((bt == T_OBJECT || bt == T_ARRAY), "Sanity test");
+          insert_barrier_before_unsafe(phase, lsn);
+        }
+      }
+    }
+    for (uint i = 0; i < n->len(); i++) {
+      if (n->in(i)) {
+        if (!visited.test_set(n->in(i)->_idx)) {
+          nodeStack.push(n->in(i));
+        }
+      }
+    }
+  }
+
+  igvn.optimize();
+  C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2);
+}
+
+// The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for
+// splicing in load barrier nodes.
+//
+// The problem is that we might have instructions between a call and its catch nodes.
+// (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in
+// already scheduled blocks.) We can't have loads that require barriers there,
+// because we need to splice in new control flow, and that would violate the IR.
+//
+// clean_catch_blocks find all Loads that require a barrier and clone them and any
+// dependent instructions to each use. The loads must be in the beginning of the catch block
+// before any store.
+//
+// Sometimes the loads use will be at a place dominated by all catch blocks, then we need
+// a load in each catch block, and a Phi at the dominated use.
+
+void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase) const {
+
+  Compile *C = phase->C;
+  uint new_ids = C->unique();
+  PhaseIterGVN &igvn = phase->igvn();
+  VectorSet visited(Thread::current()->resource_area());
+  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
+  nodeStack.push(C->root());
+  visited.test_set(C->root()->_idx);
+
+  // Traverse all nodes, visit all loads that require a barrier
+  while(nodeStack.length() > 0) {
+    Node *n = nodeStack.pop();
+
+    bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
+    if (n->is_Load() && is_old_node) {
+      LoadNode* load = n->isa_Load();
+      // only care about loads that will have a barrier
+      if (load_require_barrier(load)) {
+        process_catch_cleanup_candidate(phase, load);
+      }
+    }
+
+    for (uint i = 0; i < n->len(); i++) {
+      if (n->in(i)) {
+        if (!visited.test_set(n->in(i)->_idx)) {
+          nodeStack.push(n->in(i));
+        }
+      }
+    }
+  }
+
+  C->print_method(PHASE_CALL_CATCH_CLEANUP, 2);
+}
+
+class DomDepthCompareClosure : public CompareClosure<LoadNode*> {
+  PhaseIdealLoop* _phase;
+
+public:
+  DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { }
+
+  int do_compare(LoadNode* const &n1, LoadNode* const &n2) {
+    int d1 = _phase->dom_depth(_phase->get_ctrl(n1));
+    int d2 = _phase->dom_depth(_phase->get_ctrl(n2));
+    if (d1 == d2) {
+      // Compare index if the depth is the same, ensures all entries are unique.
+      return n1->_idx - n2->_idx;
+    } else {
+      return d2 - d1;
+    }
+  }
+};
+
+// Traverse graph and add all loadPs to list, sorted by dom depth
+void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray<LoadNode*>* loadList) {
+
+  VectorSet visited(Thread::current()->resource_area());
+  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
+  DomDepthCompareClosure ddcc(phase);
+
+  nodeStack.push(phase->C->root());
+  while(nodeStack.length() > 0) {
+    Node *n = nodeStack.pop();
+    if (visited.test(n->_idx)) {
+      continue;
+    }
+
+    if (n->isa_Load()) {
+      LoadNode *load = n->as_Load();
+      if (load_require_barrier(load)) {
+        assert(phase->get_ctrl(load) != NULL, "sanity");
+        assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity");
+        loadList->insert_sorted(&ddcc, load);
+      }
+    }
+
+    visited.set(n->_idx);
+    for (uint i = 0; i < n->req(); i++) {
+      if (n->in(i)) {
+        if (!visited.test(n->in(i)->_idx)) {
+          nodeStack.push(n->in(i));
         }
       }
     }
   }
 }
 
-#endif
+// Add LoadBarriers to all LoadPs
+void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const {
+
+  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
+  GrowableArray<LoadNode *> loadList(Thread::current()->resource_area(), 0, 0, NULL);
+  gather_loadnodes_sorted(phase, &loadList);
+
+  PhaseIterGVN &igvn = phase->igvn();
+  int count = 0;
+
+  for (GrowableArrayIterator<LoadNode *> loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) {
+    LoadNode *load = *loadIter;
 
-bool ZBarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const {
-  switch (opcode) {
-    case Op_LoadBarrierSlowReg:
-    case Op_LoadBarrierWeakSlowReg:
-      conn_graph->add_objload_to_connection_graph(n, delayed_worklist);
-      return true;
+    if (load_has_expanded_barrier(load)) {
+      continue;
+    }
 
-    case Op_Proj:
-      if (n->as_Proj()->_con != LoadBarrierNode::Oop || !n->in(0)->is_LoadBarrier()) {
-        return false;
+    do {
+      // Insert a barrier on a loadP
+      // if another load is found that needs to be expanded first, retry on that one
+      LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load));
+      while (result != NULL) {
+        result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result));
       }
-      conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0)->in(LoadBarrierNode::Oop), delayed_worklist);
-      return true;
+    } while (!load_has_expanded_barrier(load));
   }
 
-  return false;
+  phase->C->print_method(PHASE_INSERT_BARRIER, 2);
+}
+
+void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) {
+  // push all stores on the same mem, that can_alias
+  // Any load found must be handled first
+  PhaseIterGVN &igvn = phase->igvn();
+  int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type());
+
+  Node *mem = start_load->in(1);
+  for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) {
+    Node *mem_use = mem->fast_out(u);
+
+    if (mem_use == start_load) continue;
+    if (!mem_use->is_Store()) continue;
+    if (!phase->has_ctrl(mem_use)) continue;
+    if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue;
+
+    // add any aliasing store in this block
+    StoreNode *store = mem_use->isa_Store();
+    const TypePtr *adr_type = store->adr_type();
+    if (igvn.C->can_alias(adr_type, load_alias_idx)) {
+      nodestack.push(store, 0);
+    }
+  }
+}
+
+LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const {
+  bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
+  PhaseIterGVN &igvn = phase->igvn();
+
+  // Check for other loadPs at the same loop depth that is reachable by a DFS
+  // - if found - return it. It needs to be inserted first
+  // - otherwise proceed and insert barrier
+
+  VectorSet visited(Thread::current()->resource_area());
+  Node_Stack nodestack(100);
+
+  nodestack.push(start_load, 0);
+  push_antidependent_stores(phase, nodestack, start_load);
+
+  while(!nodestack.is_empty()) {
+    Node* n = nodestack.node(); // peek
+    nodestack.pop();
+    if (visited.test(n->_idx)) {
+      continue;
+    }
+
+    if (n->is_Load() && n != start_load && load_require_barrier(n->as_Load()) && !load_has_expanded_barrier(n->as_Load())) {
+      // Found another load that needs a barrier in the same block. Must expand later loads first.
+      if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx);
+      return n->as_Load(); // return node that should be expanded first
+    }
+
+    if (!phase->has_ctrl(n)) continue;
+    if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue;
+    if (n->is_Phi()) continue;
+
+    visited.set(n->_idx);
+    // push all children
+    for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) {
+      Node* c = n->fast_out(ii);
+      if (c != NULL) {
+        nodestack.push(c, 0);
+      }
+    }
+  }
+
+  insert_one_loadbarrier_inner(phase, start_load, ctrl, visited);
+  return NULL;
 }
 
-bool ZBarrierSetC2::escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const {
-  switch (opcode) {
-    case Op_LoadBarrierSlowReg:
-    case Op_LoadBarrierWeakSlowReg:
-      if (gvn->type(n)->make_ptr() == NULL) {
-        return false;
-      }
-      conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(MemNode::Address), NULL);
-      return true;
+void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const {
+  PhaseIterGVN &igvn = phase->igvn();
+  Compile* C = igvn.C;
+  bool trace = C->directive()->ZTraceLoadBarriersOption;
+
+  // create barrier
+  Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load_has_weak_barrier(load));
+  Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
+  Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
+
+  if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx);
+
+  // Splice control
+  // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end.
+  // - If control successor is a catch, step over to next.
+  Node* ctrl_succ = NULL;
+  for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) {
+    Node* tmp = ctrl->fast_out(j);
 
-    case Op_Proj:
-      if (n->as_Proj()->_con != LoadBarrierNode::Oop || !n->in(0)->is_LoadBarrier()) {
-        return false;
-      }
-      conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0)->in(LoadBarrierNode::Oop), NULL);
-      return true;
+    // - CFG nodes is the ones we are going to splice (1 only!)
+    // - Phi nodes will continue to hang from the region node!
+    // - self loops should be skipped
+    if (tmp->is_Phi() || tmp == ctrl) {
+      continue;
+    }
+
+    if (tmp->is_CFG()) {
+      assert(ctrl_succ == NULL, "There can be only one");
+      ctrl_succ = tmp;
+      continue;
+    }
+  }
+
+  // Now splice control
+  assert(ctrl_succ != load, "sanity");
+  assert(ctrl_succ != NULL, "Broken IR");
+  bool found = false;
+  for(uint k = 0; k < ctrl_succ->req(); k++) {
+    if (ctrl_succ->in(k) == ctrl) {
+      assert(!found, "sanity");
+      if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx);
+      igvn.replace_input_of(ctrl_succ, k, barrier_ctrl);
+      found = true;
+      k--;
+    }
   }
 
-  return false;
+  // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead
+  for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) {
+    Node* tmp = ctrl->fast_out(r);
+    if (visited2.test(tmp->_idx) && (tmp != load)) {
+      if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx);
+      igvn.replace_input_of(tmp, 0, barrier_ctrl);
+      --r; --imax;
+    }
+  }
+
+  // Move the loads user to the barrier
+  for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
+    Node* u = load->fast_out(i);
+    if (u->isa_LoadBarrier()) {
+      continue;
+    }
+
+    // find correct input  - replace with iterator?
+    for(uint j = 0; j < u->req(); j++) {
+      if (u->in(j) == load) {
+        igvn.replace_input_of(u, j, barrier_val);
+        --i; --imax; // Adjust the iterator of the *outer* loop
+        break; // some nodes (calls) might have several uses from the same node
+      }
+    }
+  }
+
+  // Connect barrier to load and control
+  barrier->set_req(LoadBarrierNode::Oop, load);
+  barrier->set_req(LoadBarrierNode::Control, ctrl);
+
+  igvn.rehash_node_delayed(load);
+  igvn.register_new_node_with_optimizer(barrier);
+  igvn.register_new_node_with_optimizer(barrier_val);
+  igvn.register_new_node_with_optimizer(barrier_ctrl);
+  load_set_expanded_barrier(load);
+
+  C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx);
 }
+
+// The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check.
+// The bad_mask address if of type TypeRawPtr, but that will alias
+// InitializeNodes until the type system is expanded.
+bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const {
+  MachNode* mnode = node->as_Mach();
+  if (mnode != NULL) {
+    intptr_t offset = 0;
+    const TypePtr *adr_type2 = NULL;
+    const Node* base = mnode->get_base_and_disp(offset, adr_type2);
+    if ((base != NULL) &&
+        (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) &&
+        (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) {
+      return false;
+    }
+  }
+  return true;
+}
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -29,15 +29,33 @@
 #include "opto/node.hpp"
 #include "utilities/growableArray.hpp"
 
+class ZCompareAndSwapPNode : public CompareAndSwapPNode {
+public:
+    ZCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
+    virtual int Opcode() const;
+};
+
+class ZWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode {
+public:
+    ZWeakCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
+    virtual int Opcode() const;
+};
+
+class ZCompareAndExchangePNode : public CompareAndExchangePNode {
+public:
+    ZCompareAndExchangePNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { }
+    virtual int Opcode() const;
+};
+
+class ZGetAndSetPNode : public GetAndSetPNode {
+public:
+    ZGetAndSetPNode(Node* c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t) : GetAndSetPNode(c, mem, adr, val, at, t) { }
+    virtual int Opcode() const;
+};
+
 class LoadBarrierNode : public MultiNode {
 private:
   bool _weak;               // On strong or weak oop reference
-  bool _writeback;          // Controls if the barrier writes the healed oop back to memory
-                            // A swap on a memory location must never write back the healed oop
-  bool _oop_reload_allowed; // Controls if the barrier are allowed to reload the oop from memory
-                            // before healing, otherwise both the oop and the address must be
-                            // passed to the barrier from the oop
-
   static bool is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n);
   void push_dominated_barriers(PhaseIterGVN* igvn) const;
 
@@ -57,9 +75,7 @@
                   Node* mem,
                   Node* val,
                   Node* adr,
-                  bool weak,
-                  bool writeback,
-                  bool oop_reload_allowed);
+                  bool weak);
 
   virtual int Opcode() const;
   virtual uint size_of() const;
@@ -86,17 +102,11 @@
   bool is_weak() const {
     return _weak;
   }
-
-  bool is_writeback() const {
-    return _writeback;
-  }
-
-  bool oop_reload_allowed() const {
-    return _oop_reload_allowed;
-  }
 };
 
 class LoadBarrierSlowRegNode : public LoadPNode {
+private:
+    bool _is_weak;
 public:
   LoadBarrierSlowRegNode(Node *c,
                          Node *mem,
@@ -104,8 +114,9 @@
                          const TypePtr *at,
                          const TypePtr* t,
                          MemOrd mo,
+                         bool weak = false,
                          ControlDependency control_dependency = DependsOnlyOnTest) :
-      LoadPNode(c, mem, adr, at, t, mo, control_dependency) {
+      LoadPNode(c, mem, adr, at, t, mo, control_dependency), _is_weak(weak) {
     init_class_id(Class_LoadBarrierSlowReg);
   }
 
@@ -118,30 +129,8 @@
   }
 
   virtual int Opcode() const;
-};
 
-class LoadBarrierWeakSlowRegNode : public LoadPNode {
-public:
-  LoadBarrierWeakSlowRegNode(Node *c,
-                             Node *mem,
-                             Node *adr,
-                             const TypePtr *at,
-                             const TypePtr* t,
-                             MemOrd mo,
-                             ControlDependency control_dependency = DependsOnlyOnTest) :
-      LoadPNode(c, mem, adr, at, t, mo, control_dependency) {
-    init_class_id(Class_LoadBarrierWeakSlowReg);
-  }
-
-  virtual const char * name() {
-    return "LoadBarrierWeakSlowRegNode";
-  }
-
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
-    return NULL;
-  }
-
-  virtual int Opcode() const;
+  bool is_weak() { return _is_weak; }
 };
 
 class ZBarrierSetC2State : public ResourceObj {
@@ -157,15 +146,17 @@
   LoadBarrierNode* load_barrier_node(int idx) const;
 };
 
+enum BarrierInfo {
+    NoBarrier       = 0,
+    RequireBarrier  = 1,
+    WeakBarrier     = 3,  // Inclusive with RequireBarrier
+    ExpandedBarrier = 4
+};
+
 class ZBarrierSetC2 : public BarrierSetC2 {
 private:
   ZBarrierSetC2State* state() const;
-  Node* make_cas_loadbarrier(C2AtomicParseAccess& access) const;
-  Node* make_cmpx_loadbarrier(C2AtomicParseAccess& access) const;
-  void expand_loadbarrier_basic(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const;
   void expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const;
-  void expand_loadbarrier_optimized(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const;
-  const TypeFunc* load_barrier_Type() const;
 
 #ifdef ASSERT
   void verify_gc_barriers(bool post_parse) const;
@@ -186,41 +177,42 @@
                                         const Type* val_type) const;
 
 public:
-  Node* load_barrier(GraphKit* kit,
-                     Node* val,
-                     Node* adr,
-                     bool weak = false,
-                     bool writeback = true,
-                     bool oop_reload_allowed = true) const;
+  virtual void* create_barrier_state(Arena* comp_arena) const;
 
-  virtual void* create_barrier_state(Arena* comp_arena) const;
   virtual bool has_load_barriers() const { return true; }
   virtual bool is_gc_barrier_node(Node* node) const;
-  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
-  virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
-  virtual void add_users_to_worklist(Unique_Node_List* worklist) const;
-  virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
+  virtual Node* step_over_gc_barrier(Node* c) const;
+
   virtual void register_potential_barrier_node(Node* node) const;
   virtual void unregister_potential_barrier_node(Node* node) const;
+  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
+  virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
+  virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
+
   virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const;
-  virtual Node* step_over_gc_barrier(Node* c) const;
-  // If the BarrierSetC2 state has kept barrier nodes in its compilation unit state to be
-  // expanded later, then now is the time to do so.
-  virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
 
-  static void find_dominating_barriers(PhaseIterGVN& igvn);
-  static void loop_optimize_gc_barrier(PhaseIdealLoop* phase, Node* node, bool last_round);
-
+  virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
   virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const;
-
   virtual bool matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const;
+  virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const;
+  virtual bool needs_anti_dependence_check(const Node* node) const;
 
 #ifdef ASSERT
   virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
 #endif
 
-  virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const;
-  virtual bool escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const;
+  // Load barrier insertion and expansion external
+  virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const;
+  virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const;
+  virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return (mode == LoopOptsZBarrierInsertion); }
+
+private:
+  // Load barrier insertion and expansion internal
+  void insert_barriers_on_unsafe(PhaseIdealLoop* phase) const;
+  void clean_catch_blocks(PhaseIdealLoop* phase) const;
+  void insert_load_barriers(PhaseIdealLoop* phase) const;
+  LoadNode* insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) const;
+  void insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited) const;
 };
 
 #endif // SHARE_GC_Z_C2_ZBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/z/zHeap.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/z/zHeap.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -328,46 +328,9 @@
   _mark.flush_and_free(thread);
 }
 
-class ZFixupPartialLoadsClosure : public ZRootsIteratorClosure {
-public:
-  virtual void do_oop(oop* p) {
-    ZBarrier::mark_barrier_on_root_oop_field(p);
-  }
-
-  virtual void do_oop(narrowOop* p) {
-    ShouldNotReachHere();
-  }
-};
-
-class ZFixupPartialLoadsTask : public ZTask {
-private:
-  ZThreadRootsIterator _thread_roots;
-
-public:
-  ZFixupPartialLoadsTask() :
-      ZTask("ZFixupPartialLoadsTask"),
-      _thread_roots() {}
-
-  virtual void work() {
-    ZFixupPartialLoadsClosure cl;
-    _thread_roots.oops_do(&cl);
-  }
-};
-
-void ZHeap::fixup_partial_loads() {
-  ZFixupPartialLoadsTask task;
-  _workers.run_parallel(&task);
-}
-
 bool ZHeap::mark_end() {
   assert(SafepointSynchronize::is_at_safepoint(), "Should be at safepoint");
 
-  // C2 can generate code where a safepoint poll is inserted
-  // between a load and the associated load barrier. To handle
-  // this case we need to rescan the thread stack here to make
-  // sure such oops are marked.
-  fixup_partial_loads();
-
   // Try end marking
   if (!_mark.end()) {
     // Marking not completed, continue concurrent mark
--- a/src/hotspot/share/gc/z/z_globals.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/gc/z/z_globals.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -82,9 +82,6 @@
   diagnostic(bool, ZVerifyForwarding, false,                                \
           "Verify forwarding tables")                                       \
                                                                             \
-  diagnostic(bool, ZOptimizeLoadBarriers, true,                             \
-          "Apply load barrier optimizations")                               \
-                                                                            \
   develop(bool, ZVerifyLoadBarriers, false,                                 \
           "Verify that reference loads are followed by barriers")
 
--- a/src/hotspot/share/opto/classes.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/classes.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -196,7 +196,10 @@
 #endif
 zgcmacro(LoadBarrier)
 zgcmacro(LoadBarrierSlowReg)
-zgcmacro(LoadBarrierWeakSlowReg)
+zgcmacro(ZCompareAndSwapP)
+zgcmacro(ZWeakCompareAndSwapP)
+zgcmacro(ZCompareAndExchangeP)
+zgcmacro(ZGetAndSetP)
 macro(Lock)
 macro(Loop)
 macro(LoopLimit)
--- a/src/hotspot/share/opto/compile.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/compile.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -2211,8 +2211,8 @@
 
 #endif
 
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
 #ifdef ASSERT
-  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
   bs->verify_gc_barriers(this, BarrierSetC2::BeforeOptimize);
 #endif
 
@@ -2371,7 +2371,6 @@
     igvn = ccp;
     igvn.optimize();
   }
-
   print_method(PHASE_ITER_GVN2, 2);
 
   if (failing())  return;
@@ -2382,12 +2381,6 @@
     return;
   }
 
-#if INCLUDE_ZGC
-  if (UseZGC) {
-    ZBarrierSetC2::find_dominating_barriers(igvn);
-  }
-#endif
-
   if (failing())  return;
 
   // Ensure that major progress is now clear
@@ -2407,28 +2400,33 @@
   }
 
 #ifdef ASSERT
-  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-  bs->verify_gc_barriers(this, BarrierSetC2::BeforeExpand);
+  bs->verify_gc_barriers(this, BarrierSetC2::BeforeLateInsertion);
+#endif
+
+  bs->barrier_insertion_phase(C, igvn);
+  if (failing())  return;
+
+#ifdef ASSERT
+  bs->verify_gc_barriers(this, BarrierSetC2::BeforeMacroExpand);
 #endif
 
   {
     TracePhase tp("macroExpand", &timers[_t_macroExpand]);
     PhaseMacroExpand  mex(igvn);
-    print_method(PHASE_BEFORE_MACRO_EXPANSION, 2);
     if (mex.expand_macro_nodes()) {
       assert(failing(), "must bail out w/ explicit message");
       return;
     }
+    print_method(PHASE_MACRO_EXPANSION, 2);
   }
 
   {
     TracePhase tp("barrierExpand", &timers[_t_barrierExpand]);
-    print_method(PHASE_BEFORE_BARRIER_EXPAND, 2);
-    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
     if (bs->expand_barriers(this, igvn)) {
       assert(failing(), "must bail out w/ explicit message");
       return;
     }
+    print_method(PHASE_BARRIER_EXPANSION, 2);
   }
 
   if (opaque4_count() > 0) {
@@ -2824,7 +2822,7 @@
     MemBarNode* mb = n->as_MemBar();
     if (mb->trailing_store() || mb->trailing_load_store()) {
       assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair");
-      Node* mem = mb->in(MemBarNode::Precedent);
+      Node* mem = BarrierSet::barrier_set()->barrier_set_c2()->step_over_gc_barrier(mb->in(MemBarNode::Precedent));
       assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) ||
              (mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op");
     } else if (mb->leading()) {
--- a/src/hotspot/share/opto/compile.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/compile.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -52,6 +52,7 @@
 class CallGenerator;
 class CloneMap;
 class ConnectionGraph;
+class IdealGraphPrinter;
 class InlineTree;
 class Int_Array;
 class LoadBarrierNode;
@@ -95,9 +96,9 @@
   LoopOptsNone,
   LoopOptsShenandoahExpand,
   LoopOptsShenandoahPostExpand,
+  LoopOptsZBarrierInsertion,
   LoopOptsSkipSplitIf,
-  LoopOptsVerify,
-  LoopOptsLastRound
+  LoopOptsVerify
 };
 
 typedef unsigned int node_idx_t;
@@ -658,6 +659,7 @@
   void          set_do_cleanup(bool z)          { _do_cleanup = z; }
   int               do_cleanup() const          { return _do_cleanup; }
   void          set_major_progress()            { _major_progress++; }
+  void          restore_major_progress(int progress) { _major_progress += progress; }
   void        clear_major_progress()            { _major_progress = 0; }
   int               max_inline_size() const     { return _max_inline_size; }
   void          set_freq_inline_size(int n)     { _freq_inline_size = n; }
@@ -747,7 +749,15 @@
     C->_latest_stage_start_counter.stamp();
   }
 
-  void print_method(CompilerPhaseType cpt, int level = 1) {
+  bool should_print(int level = 1) {
+#ifndef PRODUCT
+    return (_printer && _printer->should_print(level));
+#else
+    return false;
+#endif
+  }
+
+  void print_method(CompilerPhaseType cpt, int level = 1, int idx = 0) {
     EventCompilerPhase event;
     if (event.should_commit()) {
       event.set_starttime(C->_latest_stage_start_counter);
@@ -757,10 +767,15 @@
       event.commit();
     }
 
-
 #ifndef PRODUCT
-    if (_printer && _printer->should_print(level)) {
-      _printer->print_method(CompilerPhaseTypeHelper::to_string(cpt), level);
+    if (should_print(level)) {
+      char output[1024];
+      if (idx != 0) {
+        sprintf(output, "%s:%d", CompilerPhaseTypeHelper::to_string(cpt), idx);
+      } else {
+        sprintf(output, "%s", CompilerPhaseTypeHelper::to_string(cpt));
+      }
+      _printer->print_method(output, level);
     }
 #endif
     C->_latest_stage_start_counter.stamp();
--- a/src/hotspot/share/opto/idealGraphPrinter.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/idealGraphPrinter.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -350,14 +350,6 @@
   _xml->flush();
 }
 
-// Print indent
-void IdealGraphPrinter::print_indent() {
-  tty->print_cr("printing indent %d", _depth);
-  for (int i = 0; i < _depth; i++) {
-    _xml->print("%s", INDENT);
-  }
-}
-
 bool IdealGraphPrinter::traverse_outs() {
   return _traverse_outs;
 }
@@ -663,14 +655,16 @@
   }
 }
 
-void IdealGraphPrinter::print_method(const char *name, int level, bool clear_nodes) {
-  print(name, (Node *)C->root(), level, clear_nodes);
+void IdealGraphPrinter::print_method(const char *name, int level) {
+  if (should_print(level)) {
+    print(name, (Node *) C->root());
+  }
 }
 
 // Print current ideal graph
-void IdealGraphPrinter::print(const char *name, Node *node, int level, bool clear_nodes) {
+void IdealGraphPrinter::print(const char *name, Node *node) {
 
-  if (!_current_method || !_should_send_method || !should_print(level)) return;
+  if (!_current_method || !_should_send_method) return;
 
   // Warning, unsafe cast?
   _chaitin = (PhaseChaitin *)C->regalloc();
--- a/src/hotspot/share/opto/idealGraphPrinter.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/idealGraphPrinter.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -81,11 +81,7 @@
   static const char *METHOD_SHORT_NAME_PROPERTY;
   static const char *ASSEMBLY_ELEMENT;
 
-  elapsedTimer _walk_time;
-  elapsedTimer _output_time;
-  elapsedTimer _build_blocks_time;
-
-  static int _file_count;
+    static int _file_count;
   networkStream *_stream;
   xmlStream *_xml;
   outputStream *_output;
@@ -97,10 +93,6 @@
   bool _traverse_outs;
   Compile *C;
 
-  static void pre_node(Node* node, void *env);
-  static void post_node(Node* node, void *env);
-
-  void print_indent();
   void print_method(ciMethod *method, int bci, InlineTree *tree);
   void print_inline_tree(InlineTree *tree);
   void visit_node(Node *n, bool edges, VectorSet* temp_set);
@@ -116,7 +108,6 @@
   void tail(const char *name);
   void head(const char *name);
   void text(const char *s);
-  intptr_t get_node_id(Node *n);
   IdealGraphPrinter();
   ~IdealGraphPrinter();
 
@@ -130,9 +121,8 @@
   void print_inlining();
   void begin_method();
   void end_method();
-  void print_method(const char *name, int level=1, bool clear_nodes = false);
-  void print(const char *name, Node *root, int level=1, bool clear_nodes = false);
-  void print_xml(const char *name);
+  void print_method(const char *name, int level = 0);
+  void print(const char *name, Node *root);
   bool should_print(int level);
   void set_compile(Compile* compile) {C = compile; }
 };
--- a/src/hotspot/share/opto/lcm.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/lcm.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -171,7 +171,6 @@
     case Op_LoadL:
     case Op_LoadP:
     case Op_LoadBarrierSlowReg:
-    case Op_LoadBarrierWeakSlowReg:
     case Op_LoadN:
     case Op_LoadS:
     case Op_LoadKlass:
--- a/src/hotspot/share/opto/loopnode.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -978,7 +978,7 @@
         wq.push(u);
         bool found_sfpt = false;
         for (uint next = 0; next < wq.size() && !found_sfpt; next++) {
-          Node *n = wq.at(next);
+          Node* n = wq.at(next);
           for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) {
             Node* u = n->fast_out(i);
             if (u == sfpt) {
@@ -992,6 +992,19 @@
         assert(found_sfpt, "no node in loop that's not input to safepoint");
       }
     }
+
+    if (UseZGC && !inner_out->in(0)->is_CountedLoopEnd()) {
+      // In some very special cases there can be a load that has no other uses than the
+      // counted loop safepoint. Then its loadbarrier will be placed between the inner
+      // loop exit and the safepoint. This is very rare
+
+      Node* ifnode = inner_out->in(1)->in(0);
+      // Region->IfTrue->If == Region->Iffalse->If
+      if (ifnode == inner_out->in(2)->in(0)) {
+        inner_out = ifnode->in(0);
+      }
+    }
+
     CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
     assert(cle == inner->loopexit_or_null(), "mismatch");
     bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
@@ -2761,7 +2774,7 @@
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
 void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
-  bool do_split_ifs = (mode == LoopOptsDefault || mode == LoopOptsLastRound);
+  bool do_split_ifs = (mode == LoopOptsDefault);
   bool skip_loop_opts = (mode == LoopOptsNone);
 
   int old_progress = C->major_progress();
@@ -2921,9 +2934,7 @@
   build_loop_late( visited, worklist, nstack );
 
   if (_verify_only) {
-    // restore major progress flag
-    for (int i = 0; i < old_progress; i++)
-      C->set_major_progress();
+    C->restore_major_progress(old_progress);
     assert(C->unique() == unique, "verification mode made Nodes? ? ?");
     assert(_igvn._worklist.size() == orig_worklist_size, "shouldn't push anything");
     return;
@@ -2967,9 +2978,7 @@
 
   if (skip_loop_opts) {
     // restore major progress flag
-    for (int i = 0; i < old_progress; i++) {
-      C->set_major_progress();
-    }
+    C->restore_major_progress(old_progress);
 
     // Cleanup any modified bits
     _igvn.optimize();
@@ -3018,11 +3027,8 @@
   // that require basic-block info (like cloning through Phi's)
   if( SplitIfBlocks && do_split_ifs ) {
     visited.Clear();
-    split_if_with_blocks( visited, nstack, mode == LoopOptsLastRound );
+    split_if_with_blocks( visited, nstack);
     NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
-    if (mode == LoopOptsLastRound) {
-      C->set_major_progress();
-    }
   }
 
   if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
@@ -3157,8 +3163,7 @@
   _ltree_root->verify_tree(loop_verify._ltree_root, NULL);
   // Reset major-progress.  It was cleared by creating a verify version of
   // PhaseIdealLoop.
-  for( int i=0; i<old_progress; i++ )
-    C->set_major_progress();
+  C->restore_major_progress(old_progress);
 }
 
 //------------------------------verify_compare---------------------------------
@@ -4288,7 +4293,6 @@
     case Op_LoadS:
     case Op_LoadP:
     case Op_LoadBarrierSlowReg:
-    case Op_LoadBarrierWeakSlowReg:
     case Op_LoadN:
     case Op_LoadRange:
     case Op_LoadD_unaligned:
--- a/src/hotspot/share/opto/loopnode.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -824,6 +824,7 @@
   // pull such a subsumed block out of the array, we write back the final
   // correct block.
   Node *get_ctrl( Node *i ) {
+
     assert(has_node(i), "");
     Node *n = get_ctrl_no_update(i);
     _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) );
@@ -1306,9 +1307,9 @@
 
   // Check for aggressive application of 'split-if' optimization,
   // using basic block level info.
-  void  split_if_with_blocks     ( VectorSet &visited, Node_Stack &nstack, bool last_round );
+  void  split_if_with_blocks     ( VectorSet &visited, Node_Stack &nstack);
   Node *split_if_with_blocks_pre ( Node *n );
-  void  split_if_with_blocks_post( Node *n, bool last_round );
+  void  split_if_with_blocks_post( Node *n );
   Node *has_local_phi_input( Node *n );
   // Mark an IfNode as being dominated by a prior test,
   // without actually altering the CFG (and hence IDOM info).
--- a/src/hotspot/share/opto/loopopts.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/loopopts.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -1195,11 +1195,11 @@
 // Do the real work in a non-recursive function.  CFG hackery wants to be
 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
 // info.
-void PhaseIdealLoop::split_if_with_blocks_post(Node *n, bool last_round) {
+void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
 
   // Cloning Cmp through Phi's involves the split-if transform.
   // FastLock is not used by an If
-  if (n->is_Cmp() && !n->is_FastLock() && !last_round) {
+  if (n->is_Cmp() && !n->is_FastLock()) {
     Node *n_ctrl = get_ctrl(n);
     // Determine if the Node has inputs from some local Phi.
     // Returns the block to clone thru.
@@ -1451,18 +1451,12 @@
       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
     _igvn.replace_node( n, n->in(1) );
   }
-
-#if INCLUDE_ZGC
-  if (UseZGC) {
-    ZBarrierSetC2::loop_optimize_gc_barrier(this, n, last_round);
-  }
-#endif
 }
 
 //------------------------------split_if_with_blocks---------------------------
 // Check for aggressive application of 'split-if' optimization,
 // using basic block level info.
-void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack, bool last_round) {
+void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) {
   Node* root = C->root();
   visited.set(root->_idx); // first, mark root as visited
   // Do pre-visit work for root
@@ -1488,7 +1482,7 @@
       // All of n's children have been processed, complete post-processing.
       if (cnt != 0 && !n->is_Con()) {
         assert(has_node(n), "no dead nodes");
-        split_if_with_blocks_post(n, last_round);
+        split_if_with_blocks_post(n);
       }
       if (must_throttle_split_if()) {
         nstack.clear();
--- a/src/hotspot/share/opto/memnode.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/memnode.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -908,14 +908,6 @@
 // a load node that reads from the source array so we may be able to
 // optimize out the ArrayCopy node later.
 Node* LoadNode::can_see_arraycopy_value(Node* st, PhaseGVN* phase) const {
-#if INCLUDE_ZGC
-  if (UseZGC) {
-    if (bottom_type()->make_oopptr() != NULL) {
-      return NULL;
-    }
-  }
-#endif
-
   Node* ld_adr = in(MemNode::Address);
   intptr_t ld_off = 0;
   AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
@@ -2811,7 +2803,8 @@
 LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required )
   : Node(required),
     _type(rt),
-    _adr_type(at)
+    _adr_type(at),
+    _has_barrier(false)
 {
   init_req(MemNode::Control, c  );
   init_req(MemNode::Memory , mem);
@@ -3105,16 +3098,6 @@
     return NULL;
   }
 
-#if INCLUDE_ZGC
-  if (UseZGC) {
-    if (req() == (Precedent+1) && in(MemBarNode::Precedent)->in(0) != NULL && in(MemBarNode::Precedent)->in(0)->is_LoadBarrier()) {
-      Node* load_node = in(MemBarNode::Precedent)->in(0)->in(LoadBarrierNode::Oop);
-      set_req(MemBarNode::Precedent, load_node);
-      return this;
-    }
-  }
-#endif
-
   bool progress = false;
   // Eliminate volatile MemBars for scalar replaced objects.
   if (can_reshape && req() == (Precedent+1)) {
--- a/src/hotspot/share/opto/memnode.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/memnode.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -164,6 +164,7 @@
     Pinned,
     DependsOnlyOnTest
   };
+
 private:
   // LoadNode::hash() doesn't take the _control_dependency field
   // into account: If the graph already has a non-pinned LoadNode and
@@ -182,6 +183,8 @@
   // this field.
   const MemOrd _mo;
 
+  uint _barrier; // Bit field with barrier information
+
 protected:
   virtual bool cmp(const Node &n) const;
   virtual uint size_of() const; // Size is bigger
@@ -193,7 +196,7 @@
 public:
 
   LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency)
-    : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _type(rt) {
+    : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _barrier(0), _type(rt) {
     init_class_id(Class_Load);
   }
   inline bool is_unordered() const { return !is_acquire(); }
@@ -262,6 +265,10 @@
   Node* convert_to_unsigned_load(PhaseGVN& gvn);
   Node* convert_to_signed_load(PhaseGVN& gvn);
 
+  void copy_barrier_info(const Node* src) { _barrier = src->as_Load()->_barrier; }
+  uint barrier_data() { return _barrier; }
+  void set_barrier_data(uint barrier_data) { _barrier |= barrier_data; }
+
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
@@ -810,6 +817,7 @@
 private:
   const Type* const _type;      // What kind of value is loaded?
   const TypePtr* _adr_type;     // What kind of memory is being addressed?
+  bool _has_barrier;
   virtual uint size_of() const; // Size is bigger
 public:
   LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required );
@@ -822,6 +830,8 @@
 
   bool result_not_used() const;
   MemBarNode* trailing_membar() const;
+  void set_has_barrier() { _has_barrier = true; };
+  bool has_barrier() const { return _has_barrier; };
 };
 
 class LoadStoreConditionalNode : public LoadStoreNode {
--- a/src/hotspot/share/opto/node.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/node.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -546,6 +546,9 @@
   if (n->is_SafePoint()) {
     n->as_SafePoint()->clone_replaced_nodes();
   }
+  if (n->is_Load()) {
+    n->as_Load()->copy_barrier_info(this);
+  }
   return n;                     // Return the clone
 }
 
@@ -564,7 +567,6 @@
   }
 }
 
-
 //------------------------------~Node------------------------------------------
 // Fancy destructor; eagerly attempt to reclaim Node numberings and storage
 void Node::destruct() {
@@ -1454,13 +1456,16 @@
 //------------------------------needs_anti_dependence_check---------------------
 // Nodes which use memory without consuming it, hence need antidependences.
 bool Node::needs_anti_dependence_check() const {
-  if( req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0 )
+  if (req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0) {
     return false;
-  else
-    return in(1)->bottom_type()->has_memory();
+  }
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  if (!bs->needs_anti_dependence_check(this)) {
+    return false;
+  }
+  return in(1)->bottom_type()->has_memory();
 }
 
-
 // Get an integer constant from a ConNode (or CastIINode).
 // Return a default value if there is no apparent constant here.
 const TypeInt* Node::find_int_type() const {
--- a/src/hotspot/share/opto/node.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/node.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -83,8 +83,8 @@
 class LoadNode;
 class LoadBarrierNode;
 class LoadBarrierSlowRegNode;
-class LoadBarrierWeakSlowRegNode;
 class LoadStoreNode;
+class LoadStoreConditionalNode;
 class LockNode;
 class LoopNode;
 class MachBranchNode;
@@ -688,8 +688,7 @@
     DEFINE_CLASS_ID(Mem,   Node, 4)
       DEFINE_CLASS_ID(Load,  Mem, 0)
         DEFINE_CLASS_ID(LoadVector,  Load, 0)
-          DEFINE_CLASS_ID(LoadBarrierSlowReg, Load, 1)
-          DEFINE_CLASS_ID(LoadBarrierWeakSlowReg, Load, 2)
+        DEFINE_CLASS_ID(LoadBarrierSlowReg, Load, 1)
       DEFINE_CLASS_ID(Store, Mem, 1)
         DEFINE_CLASS_ID(StoreVector, Store, 0)
       DEFINE_CLASS_ID(LoadStore, Mem, 2)
@@ -830,9 +829,9 @@
   DEFINE_CLASS_QUERY(JumpProj)
   DEFINE_CLASS_QUERY(Load)
   DEFINE_CLASS_QUERY(LoadStore)
+  DEFINE_CLASS_QUERY(LoadStoreConditional)
   DEFINE_CLASS_QUERY(LoadBarrier)
   DEFINE_CLASS_QUERY(LoadBarrierSlowReg)
-  DEFINE_CLASS_QUERY(LoadBarrierWeakSlowReg)
   DEFINE_CLASS_QUERY(Lock)
   DEFINE_CLASS_QUERY(Loop)
   DEFINE_CLASS_QUERY(Mach)
--- a/src/hotspot/share/opto/phaseX.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/phaseX.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -1003,9 +1003,6 @@
         n->is_Mem() )
       add_users_to_worklist(n);
   }
-
-  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-  bs->add_users_to_worklist(&_worklist);
 }
 
 /**
--- a/src/hotspot/share/opto/phasetype.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/phasetype.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -52,8 +52,11 @@
   PHASE_MATCHING,
   PHASE_INCREMENTAL_INLINE,
   PHASE_INCREMENTAL_BOXING_INLINE,
-  PHASE_BEFORE_BARRIER_EXPAND,
-  PHASE_BEFORE_MACRO_EXPANSION,
+  PHASE_CALL_CATCH_CLEANUP,
+  PHASE_INSERT_BARRIER,
+  PHASE_MACRO_EXPANSION,
+  PHASE_BARRIER_EXPANSION,
+  PHASE_ADD_UNSAFE_BARRIER,
   PHASE_END,
   PHASE_FAILURE,
 
@@ -90,8 +93,11 @@
       case PHASE_MATCHING:                   return "After matching";
       case PHASE_INCREMENTAL_INLINE:         return "Incremental Inline";
       case PHASE_INCREMENTAL_BOXING_INLINE:  return "Incremental Boxing Inline";
-      case PHASE_BEFORE_BARRIER_EXPAND:      return "Before Barrier Expand";
-      case PHASE_BEFORE_MACRO_EXPANSION:     return "Before macro expansion";
+      case PHASE_CALL_CATCH_CLEANUP:         return "Call catch cleanup";
+      case PHASE_INSERT_BARRIER:             return "Insert barrier";
+      case PHASE_MACRO_EXPANSION:            return "Macro expand";
+      case PHASE_BARRIER_EXPANSION:          return "Barrier expand";
+      case PHASE_ADD_UNSAFE_BARRIER:         return "Add barrier to unsafe op";
       case PHASE_END:                        return "End";
       case PHASE_FAILURE:                    return "Failure";
       default:
--- a/src/hotspot/share/opto/vectornode.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/opto/vectornode.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -298,7 +298,6 @@
   case Op_LoadF:   case Op_LoadD:
   case Op_LoadP:   case Op_LoadN:
   case Op_LoadBarrierSlowReg:
-  case Op_LoadBarrierWeakSlowReg:
     *start = 0;
     *end   = 0; // no vector operands
     break;
--- a/src/hotspot/share/runtime/stackValue.cpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/runtime/stackValue.cpp	Thu Feb 14 14:54:05 2019 +0100
@@ -133,16 +133,12 @@
       }
 #endif
       // Deoptimization must make sure all oops have passed load barriers
-#if INCLUDE_ZGC
-      if (UseZGC) {
-        val = ZBarrier::load_barrier_on_oop_field_preloaded((oop*)value_addr, val);
-      }
-#endif
 #if INCLUDE_SHENANDOAHGC
       if (UseShenandoahGC) {
         val = ShenandoahBarrierSet::barrier_set()->load_reference_barrier(val);
       }
 #endif
+      assert(oopDesc::is_oop_or_null(val, false), "bad oop found");
       Handle h(Thread::current(), val); // Wrap a handle around the oop
       return new StackValue(h);
     }
--- a/src/hotspot/share/utilities/growableArray.hpp	Mon Jun 10 05:09:52 2019 +0200
+++ b/src/hotspot/share/utilities/growableArray.hpp	Thu Feb 14 14:54:05 2019 +0100
@@ -152,6 +152,12 @@
 template<class E> class GrowableArrayIterator;
 template<class E, class UnaryPredicate> class GrowableArrayFilterIterator;
 
+template<class E>
+class CompareClosure : public Closure {
+public:
+    virtual int do_compare(const E&, const E&) = 0;
+};
+
 template<class E> class GrowableArray : public GenericGrowableArray {
   friend class VMStructs;
 
@@ -443,6 +449,37 @@
     }
     return min;
   }
+
+  E insert_sorted(CompareClosure<E>* cc, const E& key) {
+    bool found;
+    int location = find_sorted(cc, key, found);
+    if (!found) {
+      insert_before(location, key);
+    }
+    return at(location);
+  }
+
+  template<typename K>
+  int find_sorted(CompareClosure<E>* cc, const K& key, bool& found) {
+    found = false;
+    int min = 0;
+    int max = length() - 1;
+
+    while (max >= min) {
+      int mid = (int)(((uint)max + min) / 2);
+      E value = at(mid);
+      int diff = cc->do_compare(key, value);
+      if (diff > 0) {
+        min = mid + 1;
+      } else if (diff < 0) {
+        max = mid - 1;
+      } else {
+        found = true;
+        return mid;
+      }
+    }
+    return min;
+  }
 };
 
 // Global GrowableArray methods (one instance in the library per each 'E' type).