# HG changeset patch # User neliasso # Date 1550152445 -3600 # Node ID ed12027517c0ca5d2ad21e214666384969893b93 # Parent ea43db53de91f5dddd34ef866d54c083e6a0771a 8224675: Late GC barrier insertion for ZGC Reviewed-by: roland, eosterlund, pliden diff -r ea43db53de91 -r ed12027517c0 src/hotspot/cpu/x86/gc/z/z_x86_64.ad --- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad Thu Feb 14 14:54:05 2019 +0100 @@ -21,6 +21,12 @@ // questions. // +source_hpp %{ + +#include "gc/z/c2/zBarrierSetC2.hpp" + +%} + source %{ #include "gc/z/zBarrierSetAssembler.hpp" @@ -45,7 +51,7 @@ rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ match(Set dst (LoadBarrierSlowReg src)); - predicate(UseAVX <= 2); + predicate((UseAVX <= 2) && !n->as_LoadBarrierSlowReg()->is_weak()); effect(DEF dst, KILL cr, KILL x0, KILL x1, KILL x2, KILL x3, @@ -74,7 +80,7 @@ rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{ match(Set dst (LoadBarrierSlowReg src)); - predicate(UseAVX == 3); + predicate((UseAVX == 3) && !n->as_LoadBarrierSlowReg()->is_weak()); effect(DEF dst, KILL cr, KILL x0, KILL x1, KILL x2, KILL x3, @@ -102,8 +108,8 @@ rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ - match(Set dst (LoadBarrierWeakSlowReg src)); - predicate(UseAVX <= 2); + match(Set dst (LoadBarrierSlowReg src)); + predicate((UseAVX <= 2) && n->as_LoadBarrierSlowReg()->is_weak()); effect(DEF dst, KILL cr, KILL x0, KILL x1, KILL x2, KILL x3, @@ -131,8 +137,8 @@ rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27, rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{ - match(Set dst (LoadBarrierWeakSlowReg src)); - predicate(UseAVX == 3); + match(Set dst (LoadBarrierSlowReg src)); + predicate((UseAVX == 3) && n->as_LoadBarrierSlowReg()->is_weak()); effect(DEF dst, KILL cr, KILL x0, KILL x1, KILL x2, KILL x3, @@ -152,3 +158,58 @@ ins_pipe(pipe_slow); %} + +// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed +// but doesn't affect output. + +instruct z_compareAndExchangeP( + memory mem_ptr, + rax_RegP oldval, rRegP newval, rRegP keepalive, + rFlagsReg cr) %{ + predicate(VM_Version::supports_cx8()); + match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval))); + effect(KILL cr); + + format %{ "cmpxchgq $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr) // lock cmpxchg + ); + ins_pipe( pipe_cmpxchg ); +%} + +instruct z_compareAndSwapP(rRegI res, + memory mem_ptr, + rax_RegP oldval, rRegP newval, rRegP keepalive, + rFlagsReg cr) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval))); + match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval))); + effect(KILL cr, KILL oldval); + + format %{ "cmpxchgq $mem_ptr,$newval\t# " + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + opcode(0x0F, 0xB1); + ins_encode(lock_prefix, + REX_reg_mem_wide(newval, mem_ptr), + OpcP, OpcS, + reg_mem(newval, mem_ptr), + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete + REX_reg_breg(res, res), // movzbl + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); + ins_pipe( pipe_cmpxchg ); +%} + +instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{ + match(Set newval (ZGetAndSetP mem (Binary newval keepalive))); + format %{ "XCHGQ $newval,[$mem]" %} + ins_encode %{ + __ xchgq($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/adlc/formssel.cpp --- a/src/hotspot/share/adlc/formssel.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/adlc/formssel.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -757,7 +757,6 @@ return NO_MEMORY_OPERAND; } - // This instruction captures the machine-independent bottom_type // Expected use is for pointer vs oop determination for LoadP bool InstructForm::captures_bottom_type(FormDict &globals) const { @@ -775,8 +774,9 @@ !strcmp(_matrule->_rChild->_opType,"GetAndSetP") || !strcmp(_matrule->_rChild->_opType,"GetAndSetN") || #if INCLUDE_ZGC + !strcmp(_matrule->_rChild->_opType,"ZGetAndSetP") || + !strcmp(_matrule->_rChild->_opType,"ZCompareAndExchangeP") || !strcmp(_matrule->_rChild->_opType,"LoadBarrierSlowReg") || - !strcmp(_matrule->_rChild->_opType,"LoadBarrierWeakSlowReg") || #endif #if INCLUDE_SHENANDOAHGC !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") || @@ -3506,12 +3506,16 @@ "CompareAndSwapB", "CompareAndSwapS", "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN", "WeakCompareAndSwapB", "WeakCompareAndSwapS", "WeakCompareAndSwapI", "WeakCompareAndSwapL", "WeakCompareAndSwapP", "WeakCompareAndSwapN", "CompareAndExchangeB", "CompareAndExchangeS", "CompareAndExchangeI", "CompareAndExchangeL", "CompareAndExchangeP", "CompareAndExchangeN", +#if INCLUDE_SHENANDOAHGC "ShenandoahCompareAndSwapN", "ShenandoahCompareAndSwapP", "ShenandoahWeakCompareAndSwapP", "ShenandoahWeakCompareAndSwapN", "ShenandoahCompareAndExchangeP", "ShenandoahCompareAndExchangeN", +#endif "StoreCM", - "ClearArray", "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP", "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN", - "LoadBarrierSlowReg", "LoadBarrierWeakSlowReg" +#if INCLUDE_ZGC + "LoadBarrierSlowReg", "ZGetAndSetP", "ZCompareAndSwapP", "ZCompareAndExchangeP", "ZWeakCompareAndSwapP", +#endif + "ClearArray" }; int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*); if( strcmp(_opType,"PrefetchAllocation")==0 ) diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/compiler/compilerDirectives.hpp --- a/src/hotspot/share/compiler/compilerDirectives.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/compiler/compilerDirectives.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -67,7 +67,7 @@ cflags(CloneMapDebug, bool, false, CloneMapDebug) \ cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel) \ cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) \ -ZGC_ONLY(cflags(ZOptimizeLoadBarriers, bool, ZOptimizeLoadBarriers, ZOptimizeLoadBarriers)) +ZGC_ONLY(cflags(ZTraceLoadBarriers, bool, false, ZTraceLoadBarriers)) #else #define compilerdirectives_c2_flags(cflags) #endif diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/shared/c2/barrierSetC2.hpp --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -259,6 +259,7 @@ Optimization, Expansion }; + virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const { return false; } virtual void clone_barrier_at_expansion(ArrayCopyNode* ac, Node* call, PhaseIterGVN& igvn) const; @@ -273,7 +274,6 @@ virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { } virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {} virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {} - virtual void add_users_to_worklist(Unique_Node_List* worklist) const {} // Allow barrier sets to have shared state that is preserved across a compilation unit. // This could for example comprise macro nodes to be expanded during macro expansion. @@ -286,17 +286,21 @@ virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; } virtual bool has_special_unique_user(const Node* node) const { return false; } + virtual bool needs_anti_dependence_check(const Node* node) const { return true; } + + virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const { } enum CompilePhase { - BeforeOptimize, /* post_parse = true */ - BeforeExpand, /* post_parse = false */ + BeforeOptimize, + BeforeLateInsertion, + BeforeMacroExpand, BeforeCodeGen }; - virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const {} virtual bool flatten_gc_alias_type(const TypePtr*& adr_type) const { return false; } #ifdef ASSERT virtual bool verify_gc_alias_type(const TypePtr* adr_type, int offset) const { return false; } + virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const {} #endif virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const { return false; } @@ -310,8 +314,8 @@ virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const { return false; }; virtual bool matcher_is_store_load_barrier(Node* x, uint xop) const { return false; } - virtual void igvn_add_users_to_worklist(PhaseIterGVN* igvn, Node* use) const {} - virtual void ccp_analyze(PhaseCCP* ccp, Unique_Node_List& worklist, Node* use) const {} + virtual void igvn_add_users_to_worklist(PhaseIterGVN* igvn, Node* use) const { } + virtual void ccp_analyze(PhaseCCP* ccp, Unique_Node_List& worklist, Node* use) const { } virtual Node* split_if_pre(PhaseIdealLoop* phase, Node* n) const { return NULL; } virtual bool build_loop_late_post(PhaseIdealLoop* phase, Node* n) const { return false; } diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -912,8 +912,6 @@ } } -void ShenandoahBarrierSetC2::add_users_to_worklist(Unique_Node_List* worklist) const {} - void* ShenandoahBarrierSetC2::create_barrier_state(Arena* comp_arena) const { return new(comp_arena) ShenandoahBarrierSetC2State(comp_arena); } @@ -928,7 +926,7 @@ #ifdef ASSERT void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const { - if (ShenandoahVerifyOptoBarriers && phase == BarrierSetC2::BeforeExpand) { + if (ShenandoahVerifyOptoBarriers && phase == BarrierSetC2::BeforeMacroExpand) { ShenandoahBarrierC2Support::verify(Compile::current()->root()); } else if (phase == BarrierSetC2::BeforeCodeGen) { // Verify G1 pre-barriers diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -126,7 +126,6 @@ virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const; virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const; - virtual void add_users_to_worklist(Unique_Node_List* worklist) const; // Allow barrier sets to have shared state that is preserved across a compilation unit. // This could for example comprise macro nodes to be expanded during macro expansion. diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp --- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -22,15 +22,22 @@ */ #include "precompiled.hpp" +#include "opto/castnode.hpp" #include "opto/compile.hpp" -#include "opto/castnode.hpp" #include "opto/escape.hpp" #include "opto/graphKit.hpp" -#include "opto/idealKit.hpp" #include "opto/loopnode.hpp" +#include "opto/machnode.hpp" #include "opto/macro.hpp" +#include "opto/memnode.hpp" +#include "opto/movenode.hpp" #include "opto/node.hpp" +#include "opto/phase.hpp" +#include "opto/phaseX.hpp" +#include "opto/rootnode.hpp" #include "opto/type.hpp" +#include "utilities/copy.hpp" +#include "utilities/growableArray.hpp" #include "utilities/macros.hpp" #include "gc/z/zBarrierSet.hpp" #include "gc/z/c2/zBarrierSetC2.hpp" @@ -84,7 +91,7 @@ if (node->is_Phi()) { PhiNode* phi = node->as_Phi(); Node* n = phi->in(1); - if (n != NULL && (n->is_LoadBarrierSlowReg() || n->is_LoadBarrierWeakSlowReg())) { + if (n != NULL && n->is_LoadBarrierSlowReg()) { return true; } } @@ -121,50 +128,19 @@ } } -void ZBarrierSetC2::find_dominating_barriers(PhaseIterGVN& igvn) { - // Look for dominating barriers on the same address only once all - // other loop opts are over. Loop opts may cause a safepoint to be - // inserted between a barrier and its dominating barrier. - Compile* C = Compile::current(); - ZBarrierSetC2* bs = (ZBarrierSetC2*)BarrierSet::barrier_set()->barrier_set_c2(); - ZBarrierSetC2State* s = bs->state(); - if (s->load_barrier_count() >= 2) { - Compile::TracePhase tp("idealLoop", &C->timers[Phase::_t_idealLoop]); - PhaseIdealLoop::optimize(igvn, LoopOptsLastRound); - if (C->major_progress()) C->print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2); - } -} - -void ZBarrierSetC2::add_users_to_worklist(Unique_Node_List* worklist) const { - // Permanent temporary workaround - // Loadbarriers may have non-obvious dead uses keeping them alive during parsing. The use is - // removed by RemoveUseless (after parsing, before optimize) but the barriers won't be added to - // the worklist. Unless we add them explicitly they are not guaranteed to end up there. - ZBarrierSetC2State* s = state(); +static bool load_require_barrier(LoadNode* load) { return ((load->barrier_data() & RequireBarrier) != 0); } +static bool load_has_weak_barrier(LoadNode* load) { return ((load->barrier_data() & WeakBarrier) != 0); } +static bool load_has_expanded_barrier(LoadNode* load) { return ((load->barrier_data() & ExpandedBarrier) != 0); } +static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); } - for (int i = 0; i < s->load_barrier_count(); i++) { - LoadBarrierNode* n = s->load_barrier_node(i); - worklist->push(n); +static void load_set_barrier(LoadNode* load, bool weak) { + if (weak) { + load->set_barrier_data(WeakBarrier); + } else { + load->set_barrier_data(RequireBarrier); } } -const TypeFunc* ZBarrierSetC2::load_barrier_Type() const { - const Type** fields; - - // Create input types (domain) - fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; - fields[TypeFunc::Parms+1] = TypeOopPtr::BOTTOM; - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // Create result type (range) - fields = TypeTuple::fields(1); - fields[TypeFunc::Parms+0] = TypeInstPtr::BOTTOM; - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields); - - return TypeFunc::make(domain, range); -} - // == LoadBarrierNode == LoadBarrierNode::LoadBarrierNode(Compile* C, @@ -172,13 +148,9 @@ Node* mem, Node* val, Node* adr, - bool weak, - bool writeback, - bool oop_reload_allowed) : + bool weak) : MultiNode(Number_of_Inputs), - _weak(weak), - _writeback(writeback), - _oop_reload_allowed(oop_reload_allowed) { + _weak(weak) { init_req(Control, c); init_req(Memory, mem); init_req(Oop, val); @@ -217,8 +189,8 @@ const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*))); const Type* val_t = phase->type(in(Oop)); floadbarrier[Control] = Type::CONTROL; - floadbarrier[Memory] = Type::MEMORY; - floadbarrier[Oop] = val_t; + floadbarrier[Memory] = Type::MEMORY; + floadbarrier[Oop] = val_t; return TypeTuple::make(Number_of_Outputs, floadbarrier); } @@ -238,6 +210,11 @@ } LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) { + if (is_weak()) { + // Weak barriers can't be eliminated + return NULL; + } + Node* val = in(LoadBarrierNode::Oop); if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) { LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier(); @@ -266,7 +243,7 @@ } } - if (ZVerifyLoadBarriers || can_be_eliminated()) { + if (can_be_eliminated()) { return NULL; } @@ -316,7 +293,7 @@ } if (ok) { assert(dom_found, ""); - return u->as_LoadBarrier();; + return u->as_LoadBarrier(); } break; } @@ -328,6 +305,7 @@ void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const { // Change to that barrier may affect a dominated barrier so re-push those + assert(!is_weak(), "sanity"); Node* val = in(LoadBarrierNode::Oop); for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) { @@ -355,13 +333,9 @@ } Node *LoadBarrierNode::Identity(PhaseGVN *phase) { - if (!phase->C->directive()->ZOptimizeLoadBarriersOption) { - return this; - } - - bool redundant_addr = false; LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false); if (dominating_barrier != NULL) { + assert(!is_weak(), "Weak barriers cant be eliminated"); assert(dominating_barrier->in(Oop) == in(Oop), ""); return dominating_barrier; } @@ -374,33 +348,33 @@ return this; } - Node* val = in(Oop); - Node* mem = in(Memory); - Node* ctrl = in(Control); - Node* adr = in(Address); + Node *val = in(Oop); + Node *mem = in(Memory); + Node *ctrl = in(Control); + assert(val->Opcode() != Op_LoadN, ""); + assert(val->Opcode() != Op_DecodeN, ""); if (mem->is_MergeMem()) { - Node* new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); + Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); set_req(Memory, new_mem); if (mem->outcnt() == 0 && can_reshape) { phase->is_IterGVN()->_worklist.push(mem); } - return this; } - bool optimizeLoadBarriers = phase->C->directive()->ZOptimizeLoadBarriersOption; - LoadBarrierNode* dominating_barrier = optimizeLoadBarriers ? has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress()) : NULL; - if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) { - assert(in(Address) == dominating_barrier->in(Address), ""); - set_req(Similar, dominating_barrier->proj_out(Oop)); - return this; + LoadBarrierNode *dominating_barrier = NULL; + if (!is_weak()) { + dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress()); + if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) { + assert(in(Address) == dominating_barrier->in(Address), ""); + set_req(Similar, dominating_barrier->proj_out(Oop)); + return this; + } } - bool eliminate = (optimizeLoadBarriers && !(val->is_Phi() || val->Opcode() == Op_LoadP || val->Opcode() == Op_GetAndSetP || val->is_DecodeN())) || - (can_reshape && (dominating_barrier != NULL || !has_true_uses())); - + bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses()); if (eliminate) { if (can_reshape) { PhaseIterGVN* igvn = phase->is_IterGVN(); @@ -415,13 +389,13 @@ fix_similar_in_uses(igvn); if (out_res != NULL) { if (dominating_barrier != NULL) { + assert(!is_weak(), "Sanity"); igvn->replace_node(out_res, dominating_barrier->proj_out(Oop)); } else { igvn->replace_node(out_res, val); } } } - return new ConINode(TypeInt::ZERO); } @@ -432,7 +406,7 @@ return this; } - if (can_reshape) { + if (can_reshape && !is_weak()) { // If this barrier is linked through the Similar edge by a // dominated barrier and both barriers have the same Oop field, // the dominated barrier can go away, so push it for reprocessing. @@ -446,10 +420,10 @@ Node* u = out_res->fast_out(i); if (u->is_LoadBarrier() && u->in(Similar) == out_res && (u->in(Oop) == val || !u->in(Similar)->is_top())) { + assert(!u->as_LoadBarrier()->is_weak(), "Sanity"); igvn->_worklist.push(u); } } - push_dominated_barriers(igvn); } @@ -479,211 +453,17 @@ bool LoadBarrierNode::has_true_uses() const { Node* out_res = proj_out_or_null(Oop); - if (out_res == NULL) { - return false; - } - - for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) { - Node* u = out_res->fast_out(i); - if (!u->is_LoadBarrier() || u->in(Similar) != out_res) { - return true; + if (out_res != NULL) { + for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) { + Node *u = out_res->fast_out(i); + if (!u->is_LoadBarrier() || u->in(Similar) != out_res) { + return true; + } } } - return false; } -// == Accesses == - -Node* ZBarrierSetC2::make_cas_loadbarrier(C2AtomicParseAccess& access) const { - assert(!UseCompressedOops, "Not allowed"); - CompareAndSwapNode* cas = (CompareAndSwapNode*)access.raw_access(); - PhaseGVN& gvn = access.gvn(); - Compile* C = Compile::current(); - GraphKit* kit = access.kit(); - - Node* in_ctrl = cas->in(MemNode::Control); - Node* in_mem = cas->in(MemNode::Memory); - Node* in_adr = cas->in(MemNode::Address); - Node* in_val = cas->in(MemNode::ValueIn); - Node* in_expected = cas->in(LoadStoreConditionalNode::ExpectedIn); - - float likely = PROB_LIKELY(0.999); - - const TypePtr *adr_type = gvn.type(in_adr)->isa_ptr(); - Compile::AliasType* alias_type = C->alias_type(adr_type); - int alias_idx = C->get_alias_index(adr_type); - - // Outer check - true: continue, false: load and check - Node* region = new RegionNode(3); - Node* phi = new PhiNode(region, TypeInt::BOOL); - Node* phi_mem = new PhiNode(region, Type::MEMORY, adr_type); - - // Inner check - is the healed ref equal to the expected - Node* region2 = new RegionNode(3); - Node* phi2 = new PhiNode(region2, TypeInt::BOOL); - Node* phi_mem2 = new PhiNode(region2, Type::MEMORY, adr_type); - - // CAS node returns 0 or 1 - Node* cmp = gvn.transform(new CmpINode(cas, kit->intcon(0))); - Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool(); - IfNode* iff = gvn.transform(new IfNode(in_ctrl, bol, likely, COUNT_UNKNOWN))->as_If(); - Node* then = gvn.transform(new IfTrueNode(iff)); - Node* elsen = gvn.transform(new IfFalseNode(iff)); - - Node* scmemproj1 = gvn.transform(new SCMemProjNode(cas)); - - kit->set_memory(scmemproj1, alias_idx); - phi_mem->init_req(1, scmemproj1); - phi_mem2->init_req(2, scmemproj1); - - // CAS fail - reload and heal oop - Node* reload = kit->make_load(elsen, in_adr, TypeOopPtr::BOTTOM, T_OBJECT, MemNode::unordered); - Node* barrier = gvn.transform(new LoadBarrierNode(C, elsen, scmemproj1, reload, in_adr, false, true, false)); - Node* barrierctrl = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Control)); - Node* barrierdata = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Oop)); - - // Check load - Node* tmpX = gvn.transform(new CastP2XNode(NULL, barrierdata)); - Node* in_expX = gvn.transform(new CastP2XNode(NULL, in_expected)); - Node* cmp2 = gvn.transform(new CmpXNode(tmpX, in_expX)); - Node *bol2 = gvn.transform(new BoolNode(cmp2, BoolTest::ne))->as_Bool(); - IfNode* iff2 = gvn.transform(new IfNode(barrierctrl, bol2, likely, COUNT_UNKNOWN))->as_If(); - Node* then2 = gvn.transform(new IfTrueNode(iff2)); - Node* elsen2 = gvn.transform(new IfFalseNode(iff2)); - - // redo CAS - Node* cas2 = gvn.transform(new CompareAndSwapPNode(elsen2, kit->memory(alias_idx), in_adr, in_val, in_expected, cas->order())); - Node* scmemproj2 = gvn.transform(new SCMemProjNode(cas2)); - kit->set_control(elsen2); - kit->set_memory(scmemproj2, alias_idx); - - // Merge inner flow - check if healed oop was equal too expected. - region2->set_req(1, kit->control()); - region2->set_req(2, then2); - phi2->set_req(1, cas2); - phi2->set_req(2, kit->intcon(0)); - phi_mem2->init_req(1, scmemproj2); - kit->set_memory(phi_mem2, alias_idx); - - // Merge outer flow - then check if first CAS succeeded - region->set_req(1, then); - region->set_req(2, region2); - phi->set_req(1, kit->intcon(1)); - phi->set_req(2, phi2); - phi_mem->init_req(2, phi_mem2); - kit->set_memory(phi_mem, alias_idx); - - gvn.transform(region2); - gvn.transform(phi2); - gvn.transform(phi_mem2); - gvn.transform(region); - gvn.transform(phi); - gvn.transform(phi_mem); - - kit->set_control(region); - kit->insert_mem_bar(Op_MemBarCPUOrder); - - return phi; -} - -Node* ZBarrierSetC2::make_cmpx_loadbarrier(C2AtomicParseAccess& access) const { - CompareAndExchangePNode* cmpx = (CompareAndExchangePNode*)access.raw_access(); - GraphKit* kit = access.kit(); - PhaseGVN& gvn = kit->gvn(); - Compile* C = Compile::current(); - - Node* in_ctrl = cmpx->in(MemNode::Control); - Node* in_mem = cmpx->in(MemNode::Memory); - Node* in_adr = cmpx->in(MemNode::Address); - Node* in_val = cmpx->in(MemNode::ValueIn); - Node* in_expected = cmpx->in(LoadStoreConditionalNode::ExpectedIn); - - float likely = PROB_LIKELY(0.999); - - const TypePtr *adr_type = cmpx->get_ptr_type(); - Compile::AliasType* alias_type = C->alias_type(adr_type); - int alias_idx = C->get_alias_index(adr_type); - - // Outer check - true: continue, false: load and check - Node* region = new RegionNode(3); - Node* phi = new PhiNode(region, adr_type); - - // Inner check - is the healed ref equal to the expected - Node* region2 = new RegionNode(3); - Node* phi2 = new PhiNode(region2, adr_type); - - // Check if cmpx succeeded - Node* cmp = gvn.transform(new CmpPNode(cmpx, in_expected)); - Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::eq))->as_Bool(); - IfNode* iff = gvn.transform(new IfNode(in_ctrl, bol, likely, COUNT_UNKNOWN))->as_If(); - Node* then = gvn.transform(new IfTrueNode(iff)); - Node* elsen = gvn.transform(new IfFalseNode(iff)); - - Node* scmemproj1 = gvn.transform(new SCMemProjNode(cmpx)); - kit->set_memory(scmemproj1, alias_idx); - - // CAS fail - reload and heal oop - Node* reload = kit->make_load(elsen, in_adr, TypeOopPtr::BOTTOM, T_OBJECT, MemNode::unordered); - Node* barrier = gvn.transform(new LoadBarrierNode(C, elsen, scmemproj1, reload, in_adr, false, true, false)); - Node* barrierctrl = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Control)); - Node* barrierdata = gvn.transform(new ProjNode(barrier, LoadBarrierNode::Oop)); - - // Check load - Node* tmpX = gvn.transform(new CastP2XNode(NULL, barrierdata)); - Node* in_expX = gvn.transform(new CastP2XNode(NULL, in_expected)); - Node* cmp2 = gvn.transform(new CmpXNode(tmpX, in_expX)); - Node *bol2 = gvn.transform(new BoolNode(cmp2, BoolTest::ne))->as_Bool(); - IfNode* iff2 = gvn.transform(new IfNode(barrierctrl, bol2, likely, COUNT_UNKNOWN))->as_If(); - Node* then2 = gvn.transform(new IfTrueNode(iff2)); - Node* elsen2 = gvn.transform(new IfFalseNode(iff2)); - - // Redo CAS - Node* cmpx2 = gvn.transform(new CompareAndExchangePNode(elsen2, kit->memory(alias_idx), in_adr, in_val, in_expected, adr_type, cmpx->get_ptr_type(), cmpx->order())); - Node* scmemproj2 = gvn.transform(new SCMemProjNode(cmpx2)); - kit->set_control(elsen2); - kit->set_memory(scmemproj2, alias_idx); - - // Merge inner flow - check if healed oop was equal too expected. - region2->set_req(1, kit->control()); - region2->set_req(2, then2); - phi2->set_req(1, cmpx2); - phi2->set_req(2, barrierdata); - - // Merge outer flow - then check if first cas succeeded - region->set_req(1, then); - region->set_req(2, region2); - phi->set_req(1, cmpx); - phi->set_req(2, phi2); - - gvn.transform(region2); - gvn.transform(phi2); - gvn.transform(region); - gvn.transform(phi); - - kit->set_control(region); - kit->set_memory(in_mem, alias_idx); - kit->insert_mem_bar(Op_MemBarCPUOrder); - - return phi; -} - -Node* ZBarrierSetC2::load_barrier(GraphKit* kit, Node* val, Node* adr, bool weak, bool writeback, bool oop_reload_allowed) const { - PhaseGVN& gvn = kit->gvn(); - Node* barrier = new LoadBarrierNode(Compile::current(), kit->control(), kit->memory(TypeRawPtr::BOTTOM), val, adr, weak, writeback, oop_reload_allowed); - Node* transformed_barrier = gvn.transform(barrier); - - if (transformed_barrier->is_LoadBarrier()) { - if (barrier == transformed_barrier) { - kit->set_control(gvn.transform(new ProjNode(barrier, LoadBarrierNode::Control))); - } - Node* result = gvn.transform(new ProjNode(transformed_barrier, LoadBarrierNode::Oop)); - return result; - } else { - return val; - } -} - static bool barrier_needed(C2Access& access) { return ZBarrierSet::barrier_needed(access.decorators(), access.type()); } @@ -695,266 +475,58 @@ } bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0; - - assert(access.is_parse_access(), "entry not supported at optimization time"); - C2ParseAccess& parse_access = static_cast(access); - GraphKit* kit = parse_access.kit(); - PhaseGVN& gvn = kit->gvn(); - Node* adr = access.addr().node(); - Node* heap_base_oop = access.base(); - bool unsafe = (access.decorators() & C2_UNSAFE_ACCESS) != 0; - if (unsafe) { - if (!ZVerifyLoadBarriers) { - p = load_barrier(kit, p, adr); - } else { - if (!TypePtr::NULL_PTR->higher_equal(gvn.type(heap_base_oop))) { - p = load_barrier(kit, p, adr); - } else { - IdealKit ideal(kit); - IdealVariable res(ideal); -#define __ ideal. - __ declarations_done(); - __ set(res, p); - __ if_then(heap_base_oop, BoolTest::ne, kit->null(), PROB_UNLIKELY(0.999)); { - kit->sync_kit(ideal); - p = load_barrier(kit, p, adr); - __ set(res, p); - __ sync_kit(kit); - } __ end_if(); - kit->final_sync(ideal); - p = __ value(res); -#undef __ - } - } - return p; - } else { - return load_barrier(parse_access.kit(), p, access.addr().node(), weak, true, true); + if (p->isa_Load()) { + load_set_barrier(p->as_Load(), weak); } + return p; } Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* val_type) const { Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); - if (!barrier_needed(access)) { - return result; + LoadStoreNode* lsn = result->as_LoadStore(); + if (barrier_needed(access)) { + lsn->set_has_barrier(); } - - access.set_needs_pinning(false); - return make_cmpx_loadbarrier(access); + return lsn; } Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const { Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); - if (!barrier_needed(access)) { - return result; + LoadStoreNode* lsn = result->as_LoadStore(); + if (barrier_needed(access)) { + lsn->set_has_barrier(); } - - Node* load_store = access.raw_access(); - bool weak_cas = (access.decorators() & C2_WEAK_CMPXCHG) != 0; - bool expected_is_null = (expected_val->get_ptr_type() == TypePtr::NULL_PTR); - - if (!expected_is_null) { - if (weak_cas) { - access.set_needs_pinning(false); - load_store = make_cas_loadbarrier(access); - } else { - access.set_needs_pinning(false); - load_store = make_cas_loadbarrier(access); - } - } - - return load_store; + return lsn; } Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const { Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); - if (!barrier_needed(access)) { - return result; + LoadStoreNode* lsn = result->as_LoadStore(); + if (barrier_needed(access)) { + lsn->set_has_barrier(); } - - Node* load_store = access.raw_access(); - Node* adr = access.addr().node(); - - assert(access.is_parse_access(), "entry not supported at optimization time"); - C2ParseAccess& parse_access = static_cast(access); - return load_barrier(parse_access.kit(), load_store, adr, false, false, false); + return lsn; } // == Macro Expansion == +// Optimized, low spill, loadbarrier variant using stub specialized on register used void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const { - Node* in_ctrl = barrier->in(LoadBarrierNode::Control); - Node* in_mem = barrier->in(LoadBarrierNode::Memory); - Node* in_val = barrier->in(LoadBarrierNode::Oop); - Node* in_adr = barrier->in(LoadBarrierNode::Address); - - Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control); - Node* out_res = barrier->proj_out(LoadBarrierNode::Oop); - - PhaseIterGVN &igvn = phase->igvn(); - - if (ZVerifyLoadBarriers) { - igvn.replace_node(out_res, in_val); - igvn.replace_node(out_ctrl, in_ctrl); - return; - } - - if (barrier->can_be_eliminated()) { - // Clone and pin the load for this barrier below the dominating - // barrier: the load cannot be allowed to float above the - // dominating barrier - Node* load = in_val; - - if (load->is_Load()) { - Node* new_load = load->clone(); - Node* addp = new_load->in(MemNode::Address); - assert(addp->is_AddP() || addp->is_Phi() || addp->is_Load(), "bad address"); - Node* cast = new CastPPNode(addp, igvn.type(addp), true); - Node* ctrl = NULL; - Node* similar = barrier->in(LoadBarrierNode::Similar); - if (similar->is_Phi()) { - // already expanded - ctrl = similar->in(0); - } else { - assert(similar->is_Proj() && similar->in(0)->is_LoadBarrier(), "unexpected graph shape"); - ctrl = similar->in(0)->as_LoadBarrier()->proj_out(LoadBarrierNode::Control); - } - assert(ctrl != NULL, "bad control"); - cast->set_req(0, ctrl); - igvn.transform(cast); - new_load->set_req(MemNode::Address, cast); - igvn.transform(new_load); - - igvn.replace_node(out_res, new_load); - igvn.replace_node(out_ctrl, in_ctrl); - return; - } - // cannot eliminate - } - - // There are two cases that require the basic loadbarrier - // 1) When the writeback of a healed oop must be avoided (swap) - // 2) When we must guarantee that no reload of is done (swap, cas, cmpx) - if (!barrier->is_writeback()) { - assert(!barrier->oop_reload_allowed(), "writeback barriers should be marked as requires oop"); - } - - if (!barrier->oop_reload_allowed()) { - expand_loadbarrier_basic(phase, barrier); - } else { - expand_loadbarrier_optimized(phase, barrier); - } -} - -// Basic loadbarrier using conventional argument passing -void ZBarrierSetC2::expand_loadbarrier_basic(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const { PhaseIterGVN &igvn = phase->igvn(); - - Node* in_ctrl = barrier->in(LoadBarrierNode::Control); - Node* in_mem = barrier->in(LoadBarrierNode::Memory); - Node* in_val = barrier->in(LoadBarrierNode::Oop); - Node* in_adr = barrier->in(LoadBarrierNode::Address); - - Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control); - Node* out_res = barrier->proj_out(LoadBarrierNode::Oop); - float unlikely = PROB_UNLIKELY(0.999); - const Type* in_val_maybe_null_t = igvn.type(in_val); - - Node* jthread = igvn.transform(new ThreadLocalNode()); - Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset())); - Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr, TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(), MemNode::unordered)); - Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val)); - Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask)); - Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type()))); - Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool(); - IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If(); - Node* then = igvn.transform(new IfTrueNode(iff)); - Node* elsen = igvn.transform(new IfFalseNode(iff)); - - Node* result_region; - Node* result_val; - - result_region = new RegionNode(3); - result_val = new PhiNode(result_region, TypeInstPtr::BOTTOM); - - result_region->set_req(1, elsen); - Node* res = igvn.transform(new CastPPNode(in_val, in_val_maybe_null_t)); - res->init_req(0, elsen); - result_val->set_req(1, res); - - const TypeFunc *tf = load_barrier_Type(); - Node* call; - if (barrier->is_weak()) { - call = new CallLeafNode(tf, - ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr(), - "ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded", - TypeRawPtr::BOTTOM); - } else { - call = new CallLeafNode(tf, - ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(), - "ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded", - TypeRawPtr::BOTTOM); - } - - call->init_req(TypeFunc::Control, then); - call->init_req(TypeFunc::I_O , phase->top()); - call->init_req(TypeFunc::Memory , in_mem); - call->init_req(TypeFunc::FramePtr, phase->top()); - call->init_req(TypeFunc::ReturnAdr, phase->top()); - call->init_req(TypeFunc::Parms+0, in_val); - if (barrier->is_writeback()) { - call->init_req(TypeFunc::Parms+1, in_adr); - } else { - // When slow path is called with a null address, the healed oop will not be written back - call->init_req(TypeFunc::Parms+1, igvn.zerocon(T_OBJECT)); - } - call = igvn.transform(call); - - Node* ctrl = igvn.transform(new ProjNode(call, TypeFunc::Control)); - res = igvn.transform(new ProjNode(call, TypeFunc::Parms)); - res = igvn.transform(new CheckCastPPNode(ctrl, res, in_val_maybe_null_t)); - - result_region->set_req(2, ctrl); - result_val->set_req(2, res); - - result_region = igvn.transform(result_region); - result_val = igvn.transform(result_val); - - if (out_ctrl != NULL) { // Added if cond - igvn.replace_node(out_ctrl, result_region); - } - igvn.replace_node(out_res, result_val); -} - -// Optimized, low spill, loadbarrier variant using stub specialized on register used -void ZBarrierSetC2::expand_loadbarrier_optimized(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const { - PhaseIterGVN &igvn = phase->igvn(); -#ifdef PRINT_NODE_TRAVERSALS - Node* preceding_barrier_node = barrier->in(LoadBarrierNode::Oop); -#endif Node* in_ctrl = barrier->in(LoadBarrierNode::Control); Node* in_mem = barrier->in(LoadBarrierNode::Memory); Node* in_val = barrier->in(LoadBarrierNode::Oop); Node* in_adr = barrier->in(LoadBarrierNode::Address); - Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control); + Node* out_ctrl = barrier->proj_out_or_null(LoadBarrierNode::Control); Node* out_res = barrier->proj_out(LoadBarrierNode::Oop); assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null"); -#ifdef PRINT_NODE_TRAVERSALS - tty->print("\n\n\nBefore barrier optimization:\n"); - traverse(barrier, out_ctrl, out_res, -1); - - tty->print("\nBefore barrier optimization: preceding_barrier_node\n"); - traverse(preceding_barrier_node, out_ctrl, out_res, -1); -#endif - - float unlikely = PROB_UNLIKELY(0.999); - Node* jthread = igvn.transform(new ThreadLocalNode()); Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset())); Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr, @@ -968,17 +540,9 @@ Node* then = igvn.transform(new IfTrueNode(iff)); Node* elsen = igvn.transform(new IfFalseNode(iff)); - Node* slow_path_surrogate; - if (!barrier->is_weak()) { - slow_path_surrogate = igvn.transform(new LoadBarrierSlowRegNode(then, in_mem, in_adr, in_val->adr_type(), - (const TypePtr*) in_val->bottom_type(), MemNode::unordered)); - } else { - slow_path_surrogate = igvn.transform(new LoadBarrierWeakSlowRegNode(then, in_mem, in_adr, in_val->adr_type(), - (const TypePtr*) in_val->bottom_type(), MemNode::unordered)); - } + Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_mem, in_adr, in_val->adr_type(), + (const TypePtr*) in_val->bottom_type(), MemNode::unordered, barrier->is_weak())); - Node *new_loadp; - new_loadp = slow_path_surrogate; // Create the final region/phi pair to converge cntl/data paths to downstream code Node* result_region = igvn.transform(new RegionNode(3)); result_region->set_req(1, then); @@ -988,37 +552,28 @@ result_phi->set_req(1, new_loadp); result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop)); - // Finally, connect the original outputs to the barrier region and phi to complete the expansion/substitution - // igvn.replace_node(out_ctrl, result_region); - if (out_ctrl != NULL) { // added if cond + if (out_ctrl != NULL) { igvn.replace_node(out_ctrl, result_region); } igvn.replace_node(out_res, result_phi); assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!"); -#ifdef PRINT_NODE_TRAVERSALS - tty->print("\nAfter barrier optimization: old out_ctrl\n"); - traverse(out_ctrl, out_ctrl, out_res, -1); - tty->print("\nAfter barrier optimization: old out_res\n"); - traverse(out_res, out_ctrl, out_res, -1); - tty->print("\nAfter barrier optimization: old barrier\n"); - traverse(barrier, out_ctrl, out_res, -1); - tty->print("\nAfter barrier optimization: preceding_barrier_node\n"); - traverse(preceding_barrier_node, result_region, result_phi, -1); -#endif + igvn.remove_dead_node(barrier); + igvn.remove_dead_node(out_ctrl); + igvn.remove_dead_node(out_res); assert(is_gc_barrier_node(result_phi), "sanity"); assert(step_over_gc_barrier(result_phi) == in_val, "sanity"); + + phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx); } bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const { ZBarrierSetC2State* s = state(); if (s->load_barrier_count() > 0) { PhaseMacroExpand macro(igvn); -#ifdef ASSERT - verify_gc_barriers(false); -#endif + int skipped = 0; while (s->load_barrier_count() > skipped) { int load_barrier_count = s->load_barrier_count(); @@ -1059,374 +614,6 @@ return false; } -// == Loop optimization == - -static bool replace_with_dominating_barrier(PhaseIdealLoop* phase, LoadBarrierNode* lb, bool last_round) { - PhaseIterGVN &igvn = phase->igvn(); - Compile* C = Compile::current(); - - LoadBarrierNode* lb2 = lb->has_dominating_barrier(phase, false, last_round); - if (lb2 == NULL) { - return false; - } - - if (lb->in(LoadBarrierNode::Oop) != lb2->in(LoadBarrierNode::Oop)) { - assert(lb->in(LoadBarrierNode::Address) == lb2->in(LoadBarrierNode::Address), "Invalid address"); - igvn.replace_input_of(lb, LoadBarrierNode::Similar, lb2->proj_out(LoadBarrierNode::Oop)); - C->set_major_progress(); - return false; - } - - // That transformation may cause the Similar edge on dominated load barriers to be invalid - lb->fix_similar_in_uses(&igvn); - - Node* val = lb->proj_out(LoadBarrierNode::Oop); - assert(lb2->has_true_uses(), "Invalid uses"); - assert(lb2->in(LoadBarrierNode::Oop) == lb->in(LoadBarrierNode::Oop), "Invalid oop"); - phase->lazy_update(lb, lb->in(LoadBarrierNode::Control)); - phase->lazy_replace(lb->proj_out(LoadBarrierNode::Control), lb->in(LoadBarrierNode::Control)); - igvn.replace_node(val, lb2->proj_out(LoadBarrierNode::Oop)); - - return true; -} - -static Node* find_dominating_memory(PhaseIdealLoop* phase, Node* mem, Node* dom, int i) { - assert(dom->is_Region() || i == -1, ""); - - Node* m = mem; - while(phase->is_dominator(dom, phase->has_ctrl(m) ? phase->get_ctrl(m) : m->in(0))) { - if (m->is_Mem()) { - assert(m->as_Mem()->adr_type() == TypeRawPtr::BOTTOM, ""); - m = m->in(MemNode::Memory); - } else if (m->is_MergeMem()) { - m = m->as_MergeMem()->memory_at(Compile::AliasIdxRaw); - } else if (m->is_Phi()) { - if (m->in(0) == dom && i != -1) { - m = m->in(i); - break; - } else { - m = m->in(LoopNode::EntryControl); - } - } else if (m->is_Proj()) { - m = m->in(0); - } else if (m->is_SafePoint() || m->is_MemBar()) { - m = m->in(TypeFunc::Memory); - } else { -#ifdef ASSERT - m->dump(); -#endif - ShouldNotReachHere(); - } - } - - return m; -} - -static LoadBarrierNode* clone_load_barrier(PhaseIdealLoop* phase, LoadBarrierNode* lb, Node* ctl, Node* mem, Node* oop_in) { - PhaseIterGVN &igvn = phase->igvn(); - Compile* C = Compile::current(); - Node* the_clone = lb->clone(); - the_clone->set_req(LoadBarrierNode::Control, ctl); - the_clone->set_req(LoadBarrierNode::Memory, mem); - if (oop_in != NULL) { - the_clone->set_req(LoadBarrierNode::Oop, oop_in); - } - - LoadBarrierNode* new_lb = the_clone->as_LoadBarrier(); - igvn.register_new_node_with_optimizer(new_lb); - IdealLoopTree *loop = phase->get_loop(new_lb->in(0)); - phase->set_ctrl(new_lb, new_lb->in(0)); - phase->set_loop(new_lb, loop); - phase->set_idom(new_lb, new_lb->in(0), phase->dom_depth(new_lb->in(0))+1); - if (!loop->_child) { - loop->_body.push(new_lb); - } - - Node* proj_ctl = new ProjNode(new_lb, LoadBarrierNode::Control); - igvn.register_new_node_with_optimizer(proj_ctl); - phase->set_ctrl(proj_ctl, proj_ctl->in(0)); - phase->set_loop(proj_ctl, loop); - phase->set_idom(proj_ctl, new_lb, phase->dom_depth(new_lb)+1); - if (!loop->_child) { - loop->_body.push(proj_ctl); - } - - Node* proj_oop = new ProjNode(new_lb, LoadBarrierNode::Oop); - phase->register_new_node(proj_oop, new_lb); - - if (!new_lb->in(LoadBarrierNode::Similar)->is_top()) { - LoadBarrierNode* similar = new_lb->in(LoadBarrierNode::Similar)->in(0)->as_LoadBarrier(); - if (!phase->is_dominator(similar, ctl)) { - igvn.replace_input_of(new_lb, LoadBarrierNode::Similar, C->top()); - } - } - - return new_lb; -} - -static void replace_barrier(PhaseIdealLoop* phase, LoadBarrierNode* lb, Node* new_val) { - PhaseIterGVN &igvn = phase->igvn(); - Node* val = lb->proj_out(LoadBarrierNode::Oop); - igvn.replace_node(val, new_val); - phase->lazy_update(lb, lb->in(LoadBarrierNode::Control)); - phase->lazy_replace(lb->proj_out(LoadBarrierNode::Control), lb->in(LoadBarrierNode::Control)); -} - -static bool split_barrier_thru_phi(PhaseIdealLoop* phase, LoadBarrierNode* lb) { - PhaseIterGVN &igvn = phase->igvn(); - Compile* C = Compile::current(); - - if (lb->in(LoadBarrierNode::Oop)->is_Phi()) { - Node* oop_phi = lb->in(LoadBarrierNode::Oop); - - if ((oop_phi->req() != 3) || (oop_phi->in(2) == oop_phi)) { - // Ignore phis with only one input - return false; - } - - if (phase->is_dominator(phase->get_ctrl(lb->in(LoadBarrierNode::Address)), - oop_phi->in(0)) && phase->get_ctrl(lb->in(LoadBarrierNode::Address)) != oop_phi->in(0)) { - // That transformation may cause the Similar edge on dominated load barriers to be invalid - lb->fix_similar_in_uses(&igvn); - - RegionNode* region = oop_phi->in(0)->as_Region(); - - int backedge = LoopNode::LoopBackControl; - if (region->is_Loop() && region->in(backedge)->is_Proj() && region->in(backedge)->in(0)->is_If()) { - Node* c = region->in(backedge)->in(0)->in(0); - assert(c->unique_ctrl_out() == region->in(backedge)->in(0), ""); - Node* oop = lb->in(LoadBarrierNode::Oop)->in(backedge); - Node* oop_c = phase->has_ctrl(oop) ? phase->get_ctrl(oop) : oop; - if (!phase->is_dominator(oop_c, c)) { - return false; - } - } - - // If the node on the backedge above the phi is the node itself - we have a self loop. - // Don't clone - this will be folded later. - if (oop_phi->in(LoopNode::LoopBackControl) == lb->proj_out(LoadBarrierNode::Oop)) { - return false; - } - - bool is_strip_mined = region->is_CountedLoop() && region->as_CountedLoop()->is_strip_mined(); - Node *phi = oop_phi->clone(); - - for (uint i = 1; i < region->req(); i++) { - Node* ctrl = region->in(i); - if (ctrl != C->top()) { - assert(!phase->is_dominator(ctrl, region) || region->is_Loop(), ""); - - Node* mem = lb->in(LoadBarrierNode::Memory); - Node* m = find_dominating_memory(phase, mem, region, i); - - if (region->is_Loop() && i == LoopNode::LoopBackControl && ctrl->is_Proj() && ctrl->in(0)->is_If()) { - ctrl = ctrl->in(0)->in(0); - } else if (region->is_Loop() && is_strip_mined) { - // If this is a strip mined loop, control must move above OuterStripMinedLoop - assert(i == LoopNode::EntryControl, "check"); - assert(ctrl->is_OuterStripMinedLoop(), "sanity"); - ctrl = ctrl->as_OuterStripMinedLoop()->in(LoopNode::EntryControl); - } - - LoadBarrierNode* new_lb = clone_load_barrier(phase, lb, ctrl, m, lb->in(LoadBarrierNode::Oop)->in(i)); - Node* out_ctrl = new_lb->proj_out(LoadBarrierNode::Control); - - if (is_strip_mined && (i == LoopNode::EntryControl)) { - assert(region->in(i)->is_OuterStripMinedLoop(), ""); - igvn.replace_input_of(region->in(i), i, out_ctrl); - phase->set_idom(region->in(i), out_ctrl, phase->dom_depth(out_ctrl)); - } else if (ctrl == region->in(i)) { - igvn.replace_input_of(region, i, out_ctrl); - // Only update the idom if is the loop entry we are updating - // - A loop backedge doesn't change the idom - if (region->is_Loop() && i == LoopNode::EntryControl) { - phase->set_idom(region, out_ctrl, phase->dom_depth(out_ctrl)); - } - } else { - Node* iff = region->in(i)->in(0); - igvn.replace_input_of(iff, 0, out_ctrl); - phase->set_idom(iff, out_ctrl, phase->dom_depth(out_ctrl)+1); - } - phi->set_req(i, new_lb->proj_out(LoadBarrierNode::Oop)); - } - } - phase->register_new_node(phi, region); - replace_barrier(phase, lb, phi); - - if (region->is_Loop()) { - // Load barrier moved to the back edge of the Loop may now - // have a safepoint on the path to the barrier on the Similar - // edge - igvn.replace_input_of(phi->in(LoopNode::LoopBackControl)->in(0), LoadBarrierNode::Similar, C->top()); - Node* head = region->in(LoopNode::EntryControl); - phase->set_idom(region, head, phase->dom_depth(head)+1); - phase->recompute_dom_depth(); - if (head->is_CountedLoop() && head->as_CountedLoop()->is_main_loop()) { - head->as_CountedLoop()->set_normal_loop(); - } - } - - return true; - } - } - - return false; -} - -static bool move_out_of_loop(PhaseIdealLoop* phase, LoadBarrierNode* lb) { - PhaseIterGVN &igvn = phase->igvn(); - IdealLoopTree *lb_loop = phase->get_loop(lb->in(0)); - if (lb_loop != phase->ltree_root() && !lb_loop->_irreducible) { - Node* oop_ctrl = phase->get_ctrl(lb->in(LoadBarrierNode::Oop)); - IdealLoopTree *oop_loop = phase->get_loop(oop_ctrl); - IdealLoopTree* adr_loop = phase->get_loop(phase->get_ctrl(lb->in(LoadBarrierNode::Address))); - if (!lb_loop->is_member(oop_loop) && !lb_loop->is_member(adr_loop)) { - // That transformation may cause the Similar edge on dominated load barriers to be invalid - lb->fix_similar_in_uses(&igvn); - - Node* head = lb_loop->_head; - assert(head->is_Loop(), ""); - - if (phase->is_dominator(head, oop_ctrl)) { - assert(oop_ctrl->Opcode() == Op_CProj && oop_ctrl->in(0)->Opcode() == Op_NeverBranch, ""); - assert(lb_loop->is_member(phase->get_loop(oop_ctrl->in(0)->in(0))), ""); - return false; - } - - if (head->is_CountedLoop()) { - CountedLoopNode* cloop = head->as_CountedLoop(); - if (cloop->is_main_loop()) { - cloop->set_normal_loop(); - } - // When we are moving barrier out of a counted loop, - // make sure we move it all the way out of the strip mined outer loop. - if (cloop->is_strip_mined()) { - head = cloop->outer_loop(); - } - } - - Node* mem = lb->in(LoadBarrierNode::Memory); - Node* m = find_dominating_memory(phase, mem, head, -1); - - LoadBarrierNode* new_lb = clone_load_barrier(phase, lb, head->in(LoopNode::EntryControl), m, NULL); - - assert(phase->idom(head) == head->in(LoopNode::EntryControl), ""); - Node* proj_ctl = new_lb->proj_out(LoadBarrierNode::Control); - igvn.replace_input_of(head, LoopNode::EntryControl, proj_ctl); - phase->set_idom(head, proj_ctl, phase->dom_depth(proj_ctl) + 1); - - replace_barrier(phase, lb, new_lb->proj_out(LoadBarrierNode::Oop)); - - phase->recompute_dom_depth(); - - return true; - } - } - - return false; -} - -static bool common_barriers(PhaseIdealLoop* phase, LoadBarrierNode* lb) { - PhaseIterGVN &igvn = phase->igvn(); - Node* in_val = lb->in(LoadBarrierNode::Oop); - for (DUIterator_Fast imax, i = in_val->fast_outs(imax); i < imax; i++) { - Node* u = in_val->fast_out(i); - if (u != lb && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) { - Node* this_ctrl = lb->in(LoadBarrierNode::Control); - Node* other_ctrl = u->in(LoadBarrierNode::Control); - - Node* lca = phase->dom_lca(this_ctrl, other_ctrl); - Node* proj1 = NULL; - Node* proj2 = NULL; - bool ok = (lb->in(LoadBarrierNode::Address) == u->in(LoadBarrierNode::Address)); - - while (this_ctrl != lca && ok) { - if (this_ctrl->in(0) != NULL && - this_ctrl->in(0)->is_MultiBranch()) { - if (this_ctrl->in(0)->in(0) == lca) { - assert(proj1 == NULL, ""); - assert(this_ctrl->is_Proj(), ""); - proj1 = this_ctrl; - } else if (!(this_ctrl->in(0)->is_If() && this_ctrl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none))) { - ok = false; - } - } - this_ctrl = phase->idom(this_ctrl); - } - while (other_ctrl != lca && ok) { - if (other_ctrl->in(0) != NULL && - other_ctrl->in(0)->is_MultiBranch()) { - if (other_ctrl->in(0)->in(0) == lca) { - assert(other_ctrl->is_Proj(), ""); - assert(proj2 == NULL, ""); - proj2 = other_ctrl; - } else if (!(other_ctrl->in(0)->is_If() && other_ctrl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none))) { - ok = false; - } - } - other_ctrl = phase->idom(other_ctrl); - } - assert(proj1 == NULL || proj2 == NULL || proj1->in(0) == proj2->in(0), ""); - if (ok && proj1 && proj2 && proj1 != proj2 && proj1->in(0)->is_If()) { - // That transformation may cause the Similar edge on dominated load barriers to be invalid - lb->fix_similar_in_uses(&igvn); - u->as_LoadBarrier()->fix_similar_in_uses(&igvn); - - Node* split = lca->unique_ctrl_out(); - assert(split->in(0) == lca, ""); - - Node* mem = lb->in(LoadBarrierNode::Memory); - Node* m = find_dominating_memory(phase, mem, split, -1); - LoadBarrierNode* new_lb = clone_load_barrier(phase, lb, lca, m, NULL); - - Node* proj_ctl = new_lb->proj_out(LoadBarrierNode::Control); - igvn.replace_input_of(split, 0, new_lb->proj_out(LoadBarrierNode::Control)); - phase->set_idom(split, proj_ctl, phase->dom_depth(proj_ctl)+1); - - Node* proj_oop = new_lb->proj_out(LoadBarrierNode::Oop); - replace_barrier(phase, lb, proj_oop); - replace_barrier(phase, u->as_LoadBarrier(), proj_oop); - - phase->recompute_dom_depth(); - - return true; - } - } - } - - return false; -} - -void ZBarrierSetC2::loop_optimize_gc_barrier(PhaseIdealLoop* phase, Node* node, bool last_round) { - if (!Compile::current()->directive()->ZOptimizeLoadBarriersOption) { - return; - } - - if (!node->is_LoadBarrier()) { - return; - } - - if (!node->as_LoadBarrier()->has_true_uses()) { - return; - } - - if (replace_with_dominating_barrier(phase, node->as_LoadBarrier(), last_round)) { - return; - } - - if (split_barrier_thru_phi(phase, node->as_LoadBarrier())) { - return; - } - - if (move_out_of_loop(phase, node->as_LoadBarrier())) { - return; - } - - if (common_barriers(phase, node->as_LoadBarrier())) { - return; - } -} - Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const { Node* node = c; @@ -1444,7 +631,7 @@ if (node->is_Phi()) { PhiNode* phi = node->as_Phi(); Node* n = phi->in(1); - if (n != NULL && (n->is_LoadBarrierSlowReg() || n->is_LoadBarrierWeakSlowReg())) { + if (n != NULL && n->is_LoadBarrierSlowReg()) { assert(c == node, "projections from step 1 should only be seen before macro expansion"); return phi->in(2); } @@ -1458,35 +645,76 @@ } bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const { - if (opcode != Op_LoadBarrierSlowReg && - opcode != Op_LoadBarrierWeakSlowReg) { - return false; - } - + switch (opcode) { + case Op_LoadBarrier: + assert(0, "There should be no load barriers left"); + case Op_ZGetAndSetP: + case Op_ZCompareAndExchangeP: + case Op_ZCompareAndSwapP: + case Op_ZWeakCompareAndSwapP: + case Op_LoadBarrierSlowReg: #ifdef ASSERT - if (VerifyOptoOopOffsets) { - MemNode* mem = n->as_Mem(); - // Check to see if address types have grounded out somehow. - const TypeInstPtr* tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr(); - ciInstanceKlass* k = tp->klass()->as_instance_klass(); - bool oop_offset_is_sane = k->contains_field_offset(tp->offset()); - assert(!tp || oop_offset_is_sane, ""); + if (VerifyOptoOopOffsets) { + MemNode *mem = n->as_Mem(); + // Check to see if address types have grounded out somehow. + const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr(); + ciInstanceKlass *k = tp->klass()->as_instance_klass(); + bool oop_offset_is_sane = k->contains_field_offset(tp->offset()); + assert(!tp || oop_offset_is_sane, ""); + } +#endif + return true; + default: + return false; } -#endif - - return true; } bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const { - if (opcode == Op_CallLeaf && - (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() || - n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr())) { - mem_op = true; - mem_addr_idx = TypeFunc::Parms + 1; - return true; + switch(opcode) { + case Op_CallLeaf: + if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() || + n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) { + mem_op = true; + mem_addr_idx = TypeFunc::Parms + 1; + return true; + } + return false; + default: + return false; } +} - return false; +bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const { + switch(opcode) { + case Op_ZCompareAndExchangeP: + case Op_ZCompareAndSwapP: + case Op_ZWeakCompareAndSwapP: { + Node *mem = n->in(MemNode::Address); + Node *keepalive = n->in(5); + Node *pair1 = new BinaryNode(mem, keepalive); + + Node *newval = n->in(MemNode::ValueIn); + Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn); + Node *pair2 = new BinaryNode(oldval, newval); + + n->set_req(MemNode::Address, pair1); + n->set_req(MemNode::ValueIn, pair2); + n->del_req(5); + n->del_req(LoadStoreConditionalNode::ExpectedIn); + return true; + } + case Op_ZGetAndSetP: { + Node *keepalive = n->in(4); + Node *newval = n->in(MemNode::ValueIn); + Node *pair = new BinaryNode(newval, keepalive); + n->set_req(MemNode::ValueIn, pair); + n->del_req(4); + return true; + } + + default: + return false; + } } // == Verification == @@ -1519,16 +747,32 @@ } void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const { - if (phase == BarrierSetC2::BeforeCodeGen) return; - bool post_parse = phase == BarrierSetC2::BeforeOptimize; - verify_gc_barriers(post_parse); + switch(phase) { + case BarrierSetC2::BeforeOptimize: + case BarrierSetC2::BeforeLateInsertion: + assert(state()->load_barrier_count() == 0, "No barriers inserted yet"); + break; + case BarrierSetC2::BeforeMacroExpand: + // Barrier placement should be set by now. + verify_gc_barriers(false /*post_parse*/); + break; + case BarrierSetC2::BeforeCodeGen: + // Barriers has been fully expanded. + assert(state()->load_barrier_count() == 0, "No more macro barriers"); + break; + default: + assert(0, "Phase without verification"); + } } +// post_parse implies that there might be load barriers without uses after parsing +// That only applies when adding barriers at parse time. void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const { ZBarrierSetC2State* s = state(); Compile* C = Compile::current(); ResourceMark rm; VectorSet visited(Thread::current()->resource_area()); + for (int i = 0; i < s->load_barrier_count(); i++) { LoadBarrierNode* n = s->load_barrier_node(i); @@ -1542,7 +786,7 @@ n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)), "broken similar edge"); - assert(post_parse || n->as_LoadBarrier()->has_true_uses(), + assert(n->as_LoadBarrier()->has_true_uses(), "found unneeded load barrier"); // Several load barrier nodes chained through their Similar edge @@ -1557,87 +801,834 @@ Unique_Node_List wq; Node* other = n->in(LoadBarrierNode::Similar)->in(0); wq.push(n); - bool ok = true; - bool dom_found = false; for (uint next = 0; next < wq.size(); ++next) { - Node *n = wq.at(next); - assert(n->is_CFG(), ""); - assert(!n->is_SafePoint(), ""); + Node *nn = wq.at(next); + assert(nn->is_CFG(), ""); + assert(!nn->is_SafePoint(), ""); - if (n == other) { + if (nn == other) { continue; } - if (n->is_Region()) { - for (uint i = 1; i < n->req(); i++) { - Node* m = n->in(i); + if (nn->is_Region()) { + for (uint i = 1; i < nn->req(); i++) { + Node* m = nn->in(i); if (m != NULL) { wq.push(m); } } } else { - Node* m = n->in(0); + Node* m = nn->in(0); if (m != NULL) { wq.push(m); } } } } + } +} - if (ZVerifyLoadBarriers) { - if ((n->is_Load() || n->is_LoadStore()) && n->bottom_type()->make_oopptr() != NULL) { - visited.Clear(); - bool found = look_for_barrier(n, post_parse, visited); - if (!found) { - n->dump(1); - n->dump(-3); - stringStream ss; - C->method()->print_short_name(&ss); - tty->print_cr("-%s-", ss.as_string()); - assert(found, ""); +#endif // end verification code + +static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl); + +// This code is cloning all uses of a load that is between a call and the catch blocks, +// to each use. + +static bool fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) { + + if (!phase->has_ctrl(node)) { + // This node is floating - doesn't need to be cloned. + assert(node != start_ctrl, "check"); + return false; + } + + Node* ctrl = phase->get_ctrl(node); + if (ctrl != start_ctrl) { + // We are in a successor block - the node is ok. + return false; // Unwind + } + + // Process successor nodes + int outcnt = node->outcnt(); + for (int i = 0; i < outcnt; i++) { + Node* n = node->raw_out(0); + assert(!n->is_LoadBarrier(), "Sanity"); + // Calling recursively, visiting leafs first + fixup_uses_in_catch(phase, start_ctrl, n); + } + + // Now all successors are outside + // - Clone this node to both successors + int no_succs = node->outcnt(); + assert(!node->is_Store(), "Stores not expected here"); + + // In some very rare cases a load that doesn't need a barrier will end up here + // Treat it as a LoadP and the insertion of phis will be done correctly. + if (node->is_Load()) { + assert(node->as_Load()->barrier_data() == 0, "Sanity"); + call_catch_cleanup_one(phase, node->as_Load(), phase->get_ctrl(node)); + } else { + for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) { + Node* use = node->fast_out(i); + Node* clone = node->clone(); + assert(clone->outcnt() == 0, ""); + + assert(use->find_edge(node) != -1, "check"); + phase->igvn().rehash_node_delayed(use); + use->replace_edge(node, clone); + + Node* new_ctrl; + if (use->is_block_start()) { + new_ctrl = use; + } else if (use->is_CFG()) { + new_ctrl = use->in(0); + assert (new_ctrl != NULL, ""); + } else { + new_ctrl = phase->get_ctrl(use); + } + + phase->set_ctrl(clone, new_ctrl); + + if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr(" Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx); + phase->igvn().register_new_node_with_optimizer(clone); + --i, --jmax; + } + assert(node->outcnt() == 0, "must be empty now"); + + // Node node is dead. + phase->igvn().remove_dead_node(node); + } + return true; // unwind - return if a use was processed +} + +// Clone a load to a specific catch_proj +static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) { + Node* cloned_load = load->clone(); + cloned_load->set_req(0, catch_proj); // set explicit control + phase->set_ctrl(cloned_load, catch_proj); // update + if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr(" Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx); + phase->igvn().register_new_node_with_optimizer(cloned_load); + return cloned_load; +} + +static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) { + Node* region = node; + while (!region->isa_Region()) { + Node *up = phase->idom(region); + assert(up != region, "Must not loop"); + assert(up != stop, "Must not find original control"); + region = up; + } + return region; +} + +// Clone this load to each catch block +static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) { + bool trace = phase->C->directive()->ZTraceLoadBarriersOption; + phase->igvn().set_delay_transform(true); + + // Verify pre conditions + assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj"); + assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch"); + + if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) { + if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx); + return; + } + + // Process the loads successor nodes - if any is between + // the call and the catch blocks, they need to be cloned to. + // This is done recursively + int outcnt = load->outcnt(); + uint index = 0; + for (int i = 0; i < outcnt; i++) { + if (index < load->outcnt()) { + Node *n = load->raw_out(index); + assert(!n->is_LoadBarrier(), "Sanity"); + if (!fixup_uses_in_catch(phase, ctrl, n)) { + // if no successor was cloned, progress to next out. + index++; + } + } + } + + // Now all the loads uses has been cloned down + // Only thing left is to clone the loads, but they must end up + // first in the catch blocks. + + // We clone the loads oo the catch blocks only when needed. + // An array is used to map the catch blocks to each lazily cloned load. + // In that way no extra unnecessary loads are cloned. + + // Any use dominated by original block must have an phi and a region added + + Node* catch_node = ctrl->raw_out(0); + int number_of_catch_projs = catch_node->outcnt(); + Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs); + Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs); + + // The phi_map is used to keep track of where phis have already been inserted + int phi_map_len = phase->C->unique(); + Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len); + Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len); + + for (unsigned int i = 0; i < load->outcnt(); i++) { + Node* load_use_control = NULL; + Node* load_use = load->raw_out(i); + + if (phase->has_ctrl(load_use)) { + load_use_control = phase->get_ctrl(load_use); + } else { + load_use_control = load_use->in(0); + } + assert(load_use_control != NULL, "sanity"); + if (trace) tty->print_cr(" Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx); + + // Some times the loads use is a phi. For them we need to determine from which catch block + // the use is defined. + bool load_use_is_phi = false; + unsigned int load_use_phi_index = 0; + Node* phi_ctrl = NULL; + if (load_use->is_Phi()) { + // Find phi input that matches load + for (unsigned int u = 1; u < load_use->req(); u++) { + if (load_use->in(u) == load) { + load_use_is_phi = true; + load_use_phi_index = u; + assert(load_use->in(0)->is_Region(), "Region or broken"); + phi_ctrl = load_use->in(0)->in(u); + assert(phi_ctrl->is_CFG(), "check"); + assert(phi_ctrl != load, "check"); + break; + } + } + assert(load_use_is_phi, "must find"); + assert(load_use_phi_index > 0, "sanity"); + } + + // For each load use, see which catch projs dominates, create load clone lazily and reconnect + bool found_dominating_catchproj = false; + for (int c = 0; c < number_of_catch_projs; c++) { + Node* catchproj = catch_node->raw_out(c); + assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity"); + + if (!phase->is_dominator(catchproj, load_use_control)) { + if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) { + // The loads use is local to the catchproj. + // fall out and replace load with catch-local load clone. + } else { + continue; + } + } + assert(!found_dominating_catchproj, "Max one should match"); + + // Clone loads to catch projs + Node* load_clone = proj_to_load_mapping[c]; + if (load_clone == NULL) { + load_clone = clone_load_to_catchproj(phase, load, catchproj); + proj_to_load_mapping[c] = load_clone; + } + phase->igvn().rehash_node_delayed(load_use); + + if (load_use_is_phi) { + // phis are special - the load is defined from a specific control flow + load_use->set_req(load_use_phi_index, load_clone); + } else { + // Multipe edges can be replaced at once - on calls for example + load_use->replace_edge(load, load_clone); + } + --i; // more than one edge can have been removed, but the next is in later iterations + + // We could break the for-loop after finding a dominating match. + // But keep iterating to catch any bad idom early. + found_dominating_catchproj = true; + } + + // We found no single catchproj that dominated the use - The use is at a point after + // where control flow from multiple catch projs have merged. We will have to create + // phi nodes before the use and tie the output from the cloned loads together. It + // can be a single phi or a number of chained phis, depending on control flow + if (!found_dominating_catchproj) { + + // Use phi-control if use is a phi + if (load_use_is_phi) { + load_use_control = phi_ctrl; + } + assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator"); + + // Clone a load on all paths + for (int c = 0; c < number_of_catch_projs; c++) { + Node* catchproj = catch_node->raw_out(c); + Node* load_clone = proj_to_load_mapping[c]; + if (load_clone == NULL) { + load_clone = clone_load_to_catchproj(phase, load, catchproj); + proj_to_load_mapping[c] = load_clone; + } + } + + // Move up dominator tree from use until dom front is reached + Node* next_region = get_dominating_region(phase, load_use_control, ctrl); + while (phase->idom(next_region) != catch_node) { + next_region = phase->idom(next_region); + if (trace) tty->print_cr("Moving up idom to region ctrl %i", next_region->_idx); + } + assert(phase->is_dominator(catch_node, next_region), "Sanity"); + + // Create or reuse phi node that collect all cloned loads and feed it to the use. + Node* test_phi = phi_map[next_region->_idx]; + if ((test_phi != NULL) && test_phi->is_Phi()) { + // Reuse an already created phi + if (trace) tty->print_cr(" Using cached Phi %i on load_use %i", test_phi->_idx, load_use->_idx); + phase->igvn().rehash_node_delayed(load_use); + load_use->replace_edge(load, test_phi); + // Now this use is done + } else { + // Otherwise we need to create one or more phis + PhiNode* next_phi = new PhiNode(next_region, load->type()); + phi_map[next_region->_idx] = next_phi; // cache new phi + phase->igvn().rehash_node_delayed(load_use); + load_use->replace_edge(load, next_phi); + + int dominators_of_region = 0; + do { + // New phi, connect to region and add all loads as in. + Node* region = next_region; + assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes"); + PhiNode* new_phi = next_phi; + + if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx); + + // Need to add all cloned loads to the phi, taking care that the right path is matched + dominators_of_region = 0; // reset for new region + for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) { + Node* region_pred = region->in(reg_i); + assert(region_pred->is_CFG(), "check"); + bool pred_has_dominator = false; + for (int c = 0; c < number_of_catch_projs; c++) { + Node* catchproj = catch_node->raw_out(c); + if (phase->is_dominator(catchproj, region_pred)) { + new_phi->set_req(reg_i, proj_to_load_mapping[c]); + if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx); + pred_has_dominator = true; + dominators_of_region++; + break; + } + } + + // Sometimes we need to chain several phis. + if (!pred_has_dominator) { + assert(dominators_of_region <= 1, "More than one region can't require extra phi"); + if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx); + // Continue search on on this region_pred + // - walk up to next region + // - create a new phi and connect to first new_phi + next_region = get_dominating_region(phase, region_pred, ctrl); + + // Lookup if there already is a phi, create a new otherwise + Node* test_phi = phi_map[next_region->_idx]; + if ((test_phi != NULL) && test_phi->is_Phi()) { + next_phi = test_phi->isa_Phi(); + dominators_of_region++; // record that a match was found and that we are done + if (trace) tty->print_cr(" Using cached phi Phi %i on control %i", next_phi->_idx, next_region->_idx); + } else { + next_phi = new PhiNode(next_region, load->type()); + phi_map[next_region->_idx] = next_phi; + } + new_phi->set_req(reg_i, next_phi); + } + } + + new_phi->set_req(0, region); + phase->igvn().register_new_node_with_optimizer(new_phi); + phase->set_ctrl(new_phi, region); + + assert(dominators_of_region != 0, "Must have found one this iteration"); + } while (dominators_of_region == 1); + } + --i; + } + } // end of loop over uses + + assert(load->outcnt() == 0, "All uses should be handled"); + phase->igvn().remove_dead_node(load); + phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx); + + // Now we should be home + phase->igvn().set_delay_transform(false); +} + +// Sort out the loads that are between a call ant its catch blocks +static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load) { + bool trace = phase->C->directive()->ZTraceLoadBarriersOption; + + Node* ctrl = phase->get_ctrl(load); + if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) { + return; + } + + Node* catch_node = ctrl->isa_Proj()->raw_out(0); + if (catch_node->is_Catch()) { + if (catch_node->outcnt() > 1) { + call_catch_cleanup_one(phase, load, ctrl); + } else { + if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx); + } + } +} + +void ZBarrierSetC2::barrier_insertion_phase(Compile* C, PhaseIterGVN& igvn) const { + PhaseIdealLoop::optimize(igvn, LoopOptsZBarrierInsertion); + if (C->failing()) return; +} + +bool ZBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const { + + if (mode == LoopOptsZBarrierInsertion) { + // First make sure all loads between call and catch are moved to the catch block + clean_catch_blocks(phase); + + // Then expand barriers on all loads + insert_load_barriers(phase); + + // Handle all Unsafe that need barriers. + insert_barriers_on_unsafe(phase); + + phase->C->clear_major_progress(); + return true; + } else { + return false; + } +} + +static bool can_simplify_cas(LoadStoreNode* node) { + if (node->isa_LoadStoreConditional()) { + Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn); + return (expected_in->get_ptr_type() == TypePtr::NULL_PTR); + } else { + return false; + } +} + +static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) { + + Compile *C = phase->C; + PhaseIterGVN &igvn = phase->igvn(); + LoadStoreNode* zclone = NULL; + bool is_weak = false; + + Node *in_ctrl = old_node->in(MemNode::Control); + Node *in_mem = old_node->in(MemNode::Memory); + Node *in_adr = old_node->in(MemNode::Address); + Node *in_val = old_node->in(MemNode::ValueIn); + const TypePtr *adr_type = old_node->adr_type(); + const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding + + switch (old_node->Opcode()) { + case Op_CompareAndExchangeP: { + zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn), + adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order()); + load_type = old_node->bottom_type()->is_ptr(); + break; + } + case Op_WeakCompareAndSwapP: { + if (can_simplify_cas(old_node)) { + break; + } + is_weak = true; + zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn), + ((CompareAndSwapNode*)old_node)->order()); + adr_type = TypePtr::BOTTOM; + break; + } + case Op_CompareAndSwapP: { + if (can_simplify_cas(old_node)) { + break; + } + zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn), + ((CompareAndSwapNode*)old_node)->order()); + adr_type = TypePtr::BOTTOM; + break; + } + case Op_GetAndSetP: { + zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type()); + load_type = old_node->bottom_type()->is_ptr(); + break; + } + } + if (zclone != NULL) { + igvn.register_new_node_with_optimizer(zclone, old_node); + + // Make load + LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered, + LoadNode::DependsOnlyOnTest); + load_set_expanded_barrier(load); + igvn.register_new_node_with_optimizer(load); + igvn.replace_node(old_node, zclone); + + Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, is_weak); + Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop); + Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control); + + igvn.register_new_node_with_optimizer(barrier); + igvn.register_new_node_with_optimizer(barrier_val); + igvn.register_new_node_with_optimizer(barrier_ctrl); + + // loop over all of in_ctrl usages and move to barrier_ctrl + for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) { + Node *use = in_ctrl->last_out(i); + uint l; + for (l = 0; use->in(l) != in_ctrl; l++) {} + igvn.replace_input_of(use, l, barrier_ctrl); + } + + load->set_req(MemNode::Control, in_ctrl); + barrier->set_req(LoadBarrierNode::Control, in_ctrl); + zclone->add_req(barrier_val); // add req as keep alive. + + C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx); + } +} + +void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const { + Compile *C = phase->C; + PhaseIterGVN &igvn = phase->igvn(); + uint new_ids = C->unique(); + VectorSet visited(Thread::current()->resource_area()); + GrowableArray nodeStack(Thread::current()->resource_area(), 0, 0, NULL); + nodeStack.push(C->root()); + visited.test_set(C->root()->_idx); + + // Traverse all nodes, visit all unsafe ops that require a barrier + while (nodeStack.length() > 0) { + Node *n = nodeStack.pop(); + + bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup + if (is_old_node) { + if (n->is_LoadStore()) { + LoadStoreNode* lsn = n->as_LoadStore(); + if (lsn->has_barrier()) { + BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type(); + assert ((bt == T_OBJECT || bt == T_ARRAY), "Sanity test"); + insert_barrier_before_unsafe(phase, lsn); + } + } + } + for (uint i = 0; i < n->len(); i++) { + if (n->in(i)) { + if (!visited.test_set(n->in(i)->_idx)) { + nodeStack.push(n->in(i)); + } + } + } + } + + igvn.optimize(); + C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2); +} + +// The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for +// splicing in load barrier nodes. +// +// The problem is that we might have instructions between a call and its catch nodes. +// (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in +// already scheduled blocks.) We can't have loads that require barriers there, +// because we need to splice in new control flow, and that would violate the IR. +// +// clean_catch_blocks find all Loads that require a barrier and clone them and any +// dependent instructions to each use. The loads must be in the beginning of the catch block +// before any store. +// +// Sometimes the loads use will be at a place dominated by all catch blocks, then we need +// a load in each catch block, and a Phi at the dominated use. + +void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase) const { + + Compile *C = phase->C; + uint new_ids = C->unique(); + PhaseIterGVN &igvn = phase->igvn(); + VectorSet visited(Thread::current()->resource_area()); + GrowableArray nodeStack(Thread::current()->resource_area(), 0, 0, NULL); + nodeStack.push(C->root()); + visited.test_set(C->root()->_idx); + + // Traverse all nodes, visit all loads that require a barrier + while(nodeStack.length() > 0) { + Node *n = nodeStack.pop(); + + bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup + if (n->is_Load() && is_old_node) { + LoadNode* load = n->isa_Load(); + // only care about loads that will have a barrier + if (load_require_barrier(load)) { + process_catch_cleanup_candidate(phase, load); + } + } + + for (uint i = 0; i < n->len(); i++) { + if (n->in(i)) { + if (!visited.test_set(n->in(i)->_idx)) { + nodeStack.push(n->in(i)); + } + } + } + } + + C->print_method(PHASE_CALL_CATCH_CLEANUP, 2); +} + +class DomDepthCompareClosure : public CompareClosure { + PhaseIdealLoop* _phase; + +public: + DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { } + + int do_compare(LoadNode* const &n1, LoadNode* const &n2) { + int d1 = _phase->dom_depth(_phase->get_ctrl(n1)); + int d2 = _phase->dom_depth(_phase->get_ctrl(n2)); + if (d1 == d2) { + // Compare index if the depth is the same, ensures all entries are unique. + return n1->_idx - n2->_idx; + } else { + return d2 - d1; + } + } +}; + +// Traverse graph and add all loadPs to list, sorted by dom depth +void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray* loadList) { + + VectorSet visited(Thread::current()->resource_area()); + GrowableArray nodeStack(Thread::current()->resource_area(), 0, 0, NULL); + DomDepthCompareClosure ddcc(phase); + + nodeStack.push(phase->C->root()); + while(nodeStack.length() > 0) { + Node *n = nodeStack.pop(); + if (visited.test(n->_idx)) { + continue; + } + + if (n->isa_Load()) { + LoadNode *load = n->as_Load(); + if (load_require_barrier(load)) { + assert(phase->get_ctrl(load) != NULL, "sanity"); + assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity"); + loadList->insert_sorted(&ddcc, load); + } + } + + visited.set(n->_idx); + for (uint i = 0; i < n->req(); i++) { + if (n->in(i)) { + if (!visited.test(n->in(i)->_idx)) { + nodeStack.push(n->in(i)); } } } } } -#endif +// Add LoadBarriers to all LoadPs +void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const { + + bool trace = phase->C->directive()->ZTraceLoadBarriersOption; + GrowableArray loadList(Thread::current()->resource_area(), 0, 0, NULL); + gather_loadnodes_sorted(phase, &loadList); + + PhaseIterGVN &igvn = phase->igvn(); + int count = 0; + + for (GrowableArrayIterator loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) { + LoadNode *load = *loadIter; -bool ZBarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const { - switch (opcode) { - case Op_LoadBarrierSlowReg: - case Op_LoadBarrierWeakSlowReg: - conn_graph->add_objload_to_connection_graph(n, delayed_worklist); - return true; + if (load_has_expanded_barrier(load)) { + continue; + } - case Op_Proj: - if (n->as_Proj()->_con != LoadBarrierNode::Oop || !n->in(0)->is_LoadBarrier()) { - return false; + do { + // Insert a barrier on a loadP + // if another load is found that needs to be expanded first, retry on that one + LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load)); + while (result != NULL) { + result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result)); } - conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0)->in(LoadBarrierNode::Oop), delayed_worklist); - return true; + } while (!load_has_expanded_barrier(load)); } - return false; + phase->C->print_method(PHASE_INSERT_BARRIER, 2); +} + +void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) { + // push all stores on the same mem, that can_alias + // Any load found must be handled first + PhaseIterGVN &igvn = phase->igvn(); + int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type()); + + Node *mem = start_load->in(1); + for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) { + Node *mem_use = mem->fast_out(u); + + if (mem_use == start_load) continue; + if (!mem_use->is_Store()) continue; + if (!phase->has_ctrl(mem_use)) continue; + if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue; + + // add any aliasing store in this block + StoreNode *store = mem_use->isa_Store(); + const TypePtr *adr_type = store->adr_type(); + if (igvn.C->can_alias(adr_type, load_alias_idx)) { + nodestack.push(store, 0); + } + } +} + +LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const { + bool trace = phase->C->directive()->ZTraceLoadBarriersOption; + PhaseIterGVN &igvn = phase->igvn(); + + // Check for other loadPs at the same loop depth that is reachable by a DFS + // - if found - return it. It needs to be inserted first + // - otherwise proceed and insert barrier + + VectorSet visited(Thread::current()->resource_area()); + Node_Stack nodestack(100); + + nodestack.push(start_load, 0); + push_antidependent_stores(phase, nodestack, start_load); + + while(!nodestack.is_empty()) { + Node* n = nodestack.node(); // peek + nodestack.pop(); + if (visited.test(n->_idx)) { + continue; + } + + if (n->is_Load() && n != start_load && load_require_barrier(n->as_Load()) && !load_has_expanded_barrier(n->as_Load())) { + // Found another load that needs a barrier in the same block. Must expand later loads first. + if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx); + return n->as_Load(); // return node that should be expanded first + } + + if (!phase->has_ctrl(n)) continue; + if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue; + if (n->is_Phi()) continue; + + visited.set(n->_idx); + // push all children + for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) { + Node* c = n->fast_out(ii); + if (c != NULL) { + nodestack.push(c, 0); + } + } + } + + insert_one_loadbarrier_inner(phase, start_load, ctrl, visited); + return NULL; } -bool ZBarrierSetC2::escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const { - switch (opcode) { - case Op_LoadBarrierSlowReg: - case Op_LoadBarrierWeakSlowReg: - if (gvn->type(n)->make_ptr() == NULL) { - return false; - } - conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(MemNode::Address), NULL); - return true; +void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const { + PhaseIterGVN &igvn = phase->igvn(); + Compile* C = igvn.C; + bool trace = C->directive()->ZTraceLoadBarriersOption; + + // create barrier + Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load_has_weak_barrier(load)); + Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop); + Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control); + + if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx); + + // Splice control + // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end. + // - If control successor is a catch, step over to next. + Node* ctrl_succ = NULL; + for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) { + Node* tmp = ctrl->fast_out(j); - case Op_Proj: - if (n->as_Proj()->_con != LoadBarrierNode::Oop || !n->in(0)->is_LoadBarrier()) { - return false; - } - conn_graph->add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0)->in(LoadBarrierNode::Oop), NULL); - return true; + // - CFG nodes is the ones we are going to splice (1 only!) + // - Phi nodes will continue to hang from the region node! + // - self loops should be skipped + if (tmp->is_Phi() || tmp == ctrl) { + continue; + } + + if (tmp->is_CFG()) { + assert(ctrl_succ == NULL, "There can be only one"); + ctrl_succ = tmp; + continue; + } + } + + // Now splice control + assert(ctrl_succ != load, "sanity"); + assert(ctrl_succ != NULL, "Broken IR"); + bool found = false; + for(uint k = 0; k < ctrl_succ->req(); k++) { + if (ctrl_succ->in(k) == ctrl) { + assert(!found, "sanity"); + if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx); + igvn.replace_input_of(ctrl_succ, k, barrier_ctrl); + found = true; + k--; + } } - return false; + // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead + for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) { + Node* tmp = ctrl->fast_out(r); + if (visited2.test(tmp->_idx) && (tmp != load)) { + if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx); + igvn.replace_input_of(tmp, 0, barrier_ctrl); + --r; --imax; + } + } + + // Move the loads user to the barrier + for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) { + Node* u = load->fast_out(i); + if (u->isa_LoadBarrier()) { + continue; + } + + // find correct input - replace with iterator? + for(uint j = 0; j < u->req(); j++) { + if (u->in(j) == load) { + igvn.replace_input_of(u, j, barrier_val); + --i; --imax; // Adjust the iterator of the *outer* loop + break; // some nodes (calls) might have several uses from the same node + } + } + } + + // Connect barrier to load and control + barrier->set_req(LoadBarrierNode::Oop, load); + barrier->set_req(LoadBarrierNode::Control, ctrl); + + igvn.rehash_node_delayed(load); + igvn.register_new_node_with_optimizer(barrier); + igvn.register_new_node_with_optimizer(barrier_val); + igvn.register_new_node_with_optimizer(barrier_ctrl); + load_set_expanded_barrier(load); + + C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx); } + +// The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check. +// The bad_mask address if of type TypeRawPtr, but that will alias +// InitializeNodes until the type system is expanded. +bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const { + MachNode* mnode = node->as_Mach(); + if (mnode != NULL) { + intptr_t offset = 0; + const TypePtr *adr_type2 = NULL; + const Node* base = mnode->get_base_and_disp(offset, adr_type2); + if ((base != NULL) && + (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) && + (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) { + return false; + } + } + return true; +} diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp --- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -29,15 +29,33 @@ #include "opto/node.hpp" #include "utilities/growableArray.hpp" +class ZCompareAndSwapPNode : public CompareAndSwapPNode { +public: + ZCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { } + virtual int Opcode() const; +}; + +class ZWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode { +public: + ZWeakCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { } + virtual int Opcode() const; +}; + +class ZCompareAndExchangePNode : public CompareAndExchangePNode { +public: + ZCompareAndExchangePNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { } + virtual int Opcode() const; +}; + +class ZGetAndSetPNode : public GetAndSetPNode { +public: + ZGetAndSetPNode(Node* c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t) : GetAndSetPNode(c, mem, adr, val, at, t) { } + virtual int Opcode() const; +}; + class LoadBarrierNode : public MultiNode { private: bool _weak; // On strong or weak oop reference - bool _writeback; // Controls if the barrier writes the healed oop back to memory - // A swap on a memory location must never write back the healed oop - bool _oop_reload_allowed; // Controls if the barrier are allowed to reload the oop from memory - // before healing, otherwise both the oop and the address must be - // passed to the barrier from the oop - static bool is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n); void push_dominated_barriers(PhaseIterGVN* igvn) const; @@ -57,9 +75,7 @@ Node* mem, Node* val, Node* adr, - bool weak, - bool writeback, - bool oop_reload_allowed); + bool weak); virtual int Opcode() const; virtual uint size_of() const; @@ -86,17 +102,11 @@ bool is_weak() const { return _weak; } - - bool is_writeback() const { - return _writeback; - } - - bool oop_reload_allowed() const { - return _oop_reload_allowed; - } }; class LoadBarrierSlowRegNode : public LoadPNode { +private: + bool _is_weak; public: LoadBarrierSlowRegNode(Node *c, Node *mem, @@ -104,8 +114,9 @@ const TypePtr *at, const TypePtr* t, MemOrd mo, + bool weak = false, ControlDependency control_dependency = DependsOnlyOnTest) : - LoadPNode(c, mem, adr, at, t, mo, control_dependency) { + LoadPNode(c, mem, adr, at, t, mo, control_dependency), _is_weak(weak) { init_class_id(Class_LoadBarrierSlowReg); } @@ -118,30 +129,8 @@ } virtual int Opcode() const; -}; -class LoadBarrierWeakSlowRegNode : public LoadPNode { -public: - LoadBarrierWeakSlowRegNode(Node *c, - Node *mem, - Node *adr, - const TypePtr *at, - const TypePtr* t, - MemOrd mo, - ControlDependency control_dependency = DependsOnlyOnTest) : - LoadPNode(c, mem, adr, at, t, mo, control_dependency) { - init_class_id(Class_LoadBarrierWeakSlowReg); - } - - virtual const char * name() { - return "LoadBarrierWeakSlowRegNode"; - } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - return NULL; - } - - virtual int Opcode() const; + bool is_weak() { return _is_weak; } }; class ZBarrierSetC2State : public ResourceObj { @@ -157,15 +146,17 @@ LoadBarrierNode* load_barrier_node(int idx) const; }; +enum BarrierInfo { + NoBarrier = 0, + RequireBarrier = 1, + WeakBarrier = 3, // Inclusive with RequireBarrier + ExpandedBarrier = 4 +}; + class ZBarrierSetC2 : public BarrierSetC2 { private: ZBarrierSetC2State* state() const; - Node* make_cas_loadbarrier(C2AtomicParseAccess& access) const; - Node* make_cmpx_loadbarrier(C2AtomicParseAccess& access) const; - void expand_loadbarrier_basic(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const; void expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const; - void expand_loadbarrier_optimized(PhaseMacroExpand* phase, LoadBarrierNode *barrier) const; - const TypeFunc* load_barrier_Type() const; #ifdef ASSERT void verify_gc_barriers(bool post_parse) const; @@ -186,41 +177,42 @@ const Type* val_type) const; public: - Node* load_barrier(GraphKit* kit, - Node* val, - Node* adr, - bool weak = false, - bool writeback = true, - bool oop_reload_allowed = true) const; + virtual void* create_barrier_state(Arena* comp_arena) const; - virtual void* create_barrier_state(Arena* comp_arena) const; virtual bool has_load_barriers() const { return true; } virtual bool is_gc_barrier_node(Node* node) const; - virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { } - virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const; - virtual void add_users_to_worklist(Unique_Node_List* worklist) const; - virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const; + virtual Node* step_over_gc_barrier(Node* c) const; + virtual void register_potential_barrier_node(Node* node) const; virtual void unregister_potential_barrier_node(Node* node) const; + virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { } + virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const; + virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const; + virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const; - virtual Node* step_over_gc_barrier(Node* c) const; - // If the BarrierSetC2 state has kept barrier nodes in its compilation unit state to be - // expanded later, then now is the time to do so. - virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; - static void find_dominating_barriers(PhaseIterGVN& igvn); - static void loop_optimize_gc_barrier(PhaseIdealLoop* phase, Node* node, bool last_round); - + virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const; - virtual bool matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const; + virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const; + virtual bool needs_anti_dependence_check(const Node* node) const; #ifdef ASSERT virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const; #endif - virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const; - virtual bool escape_add_final_edges(ConnectionGraph* conn_graph, PhaseGVN* gvn, Node* n, uint opcode) const; + // Load barrier insertion and expansion external + virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const; + virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const; + virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return (mode == LoopOptsZBarrierInsertion); } + +private: + // Load barrier insertion and expansion internal + void insert_barriers_on_unsafe(PhaseIdealLoop* phase) const; + void clean_catch_blocks(PhaseIdealLoop* phase) const; + void insert_load_barriers(PhaseIdealLoop* phase) const; + LoadNode* insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) const; + void insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited) const; }; #endif // SHARE_GC_Z_C2_ZBARRIERSETC2_HPP diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/z/zHeap.cpp --- a/src/hotspot/share/gc/z/zHeap.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/z/zHeap.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -328,46 +328,9 @@ _mark.flush_and_free(thread); } -class ZFixupPartialLoadsClosure : public ZRootsIteratorClosure { -public: - virtual void do_oop(oop* p) { - ZBarrier::mark_barrier_on_root_oop_field(p); - } - - virtual void do_oop(narrowOop* p) { - ShouldNotReachHere(); - } -}; - -class ZFixupPartialLoadsTask : public ZTask { -private: - ZThreadRootsIterator _thread_roots; - -public: - ZFixupPartialLoadsTask() : - ZTask("ZFixupPartialLoadsTask"), - _thread_roots() {} - - virtual void work() { - ZFixupPartialLoadsClosure cl; - _thread_roots.oops_do(&cl); - } -}; - -void ZHeap::fixup_partial_loads() { - ZFixupPartialLoadsTask task; - _workers.run_parallel(&task); -} - bool ZHeap::mark_end() { assert(SafepointSynchronize::is_at_safepoint(), "Should be at safepoint"); - // C2 can generate code where a safepoint poll is inserted - // between a load and the associated load barrier. To handle - // this case we need to rescan the thread stack here to make - // sure such oops are marked. - fixup_partial_loads(); - // Try end marking if (!_mark.end()) { // Marking not completed, continue concurrent mark diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/gc/z/z_globals.hpp --- a/src/hotspot/share/gc/z/z_globals.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/gc/z/z_globals.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -82,9 +82,6 @@ diagnostic(bool, ZVerifyForwarding, false, \ "Verify forwarding tables") \ \ - diagnostic(bool, ZOptimizeLoadBarriers, true, \ - "Apply load barrier optimizations") \ - \ develop(bool, ZVerifyLoadBarriers, false, \ "Verify that reference loads are followed by barriers") diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/classes.hpp --- a/src/hotspot/share/opto/classes.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/classes.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -196,7 +196,10 @@ #endif zgcmacro(LoadBarrier) zgcmacro(LoadBarrierSlowReg) -zgcmacro(LoadBarrierWeakSlowReg) +zgcmacro(ZCompareAndSwapP) +zgcmacro(ZWeakCompareAndSwapP) +zgcmacro(ZCompareAndExchangeP) +zgcmacro(ZGetAndSetP) macro(Lock) macro(Loop) macro(LoopLimit) diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/compile.cpp --- a/src/hotspot/share/opto/compile.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/compile.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -2211,8 +2211,8 @@ #endif + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); #ifdef ASSERT - BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); bs->verify_gc_barriers(this, BarrierSetC2::BeforeOptimize); #endif @@ -2371,7 +2371,6 @@ igvn = ccp; igvn.optimize(); } - print_method(PHASE_ITER_GVN2, 2); if (failing()) return; @@ -2382,12 +2381,6 @@ return; } -#if INCLUDE_ZGC - if (UseZGC) { - ZBarrierSetC2::find_dominating_barriers(igvn); - } -#endif - if (failing()) return; // Ensure that major progress is now clear @@ -2407,28 +2400,33 @@ } #ifdef ASSERT - BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); - bs->verify_gc_barriers(this, BarrierSetC2::BeforeExpand); + bs->verify_gc_barriers(this, BarrierSetC2::BeforeLateInsertion); +#endif + + bs->barrier_insertion_phase(C, igvn); + if (failing()) return; + +#ifdef ASSERT + bs->verify_gc_barriers(this, BarrierSetC2::BeforeMacroExpand); #endif { TracePhase tp("macroExpand", &timers[_t_macroExpand]); PhaseMacroExpand mex(igvn); - print_method(PHASE_BEFORE_MACRO_EXPANSION, 2); if (mex.expand_macro_nodes()) { assert(failing(), "must bail out w/ explicit message"); return; } + print_method(PHASE_MACRO_EXPANSION, 2); } { TracePhase tp("barrierExpand", &timers[_t_barrierExpand]); - print_method(PHASE_BEFORE_BARRIER_EXPAND, 2); - BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); if (bs->expand_barriers(this, igvn)) { assert(failing(), "must bail out w/ explicit message"); return; } + print_method(PHASE_BARRIER_EXPANSION, 2); } if (opaque4_count() > 0) { @@ -2824,7 +2822,7 @@ MemBarNode* mb = n->as_MemBar(); if (mb->trailing_store() || mb->trailing_load_store()) { assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair"); - Node* mem = mb->in(MemBarNode::Precedent); + Node* mem = BarrierSet::barrier_set()->barrier_set_c2()->step_over_gc_barrier(mb->in(MemBarNode::Precedent)); assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) || (mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op"); } else if (mb->leading()) { diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/compile.hpp --- a/src/hotspot/share/opto/compile.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/compile.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -52,6 +52,7 @@ class CallGenerator; class CloneMap; class ConnectionGraph; +class IdealGraphPrinter; class InlineTree; class Int_Array; class LoadBarrierNode; @@ -95,9 +96,9 @@ LoopOptsNone, LoopOptsShenandoahExpand, LoopOptsShenandoahPostExpand, + LoopOptsZBarrierInsertion, LoopOptsSkipSplitIf, - LoopOptsVerify, - LoopOptsLastRound + LoopOptsVerify }; typedef unsigned int node_idx_t; @@ -658,6 +659,7 @@ void set_do_cleanup(bool z) { _do_cleanup = z; } int do_cleanup() const { return _do_cleanup; } void set_major_progress() { _major_progress++; } + void restore_major_progress(int progress) { _major_progress += progress; } void clear_major_progress() { _major_progress = 0; } int max_inline_size() const { return _max_inline_size; } void set_freq_inline_size(int n) { _freq_inline_size = n; } @@ -747,7 +749,15 @@ C->_latest_stage_start_counter.stamp(); } - void print_method(CompilerPhaseType cpt, int level = 1) { + bool should_print(int level = 1) { +#ifndef PRODUCT + return (_printer && _printer->should_print(level)); +#else + return false; +#endif + } + + void print_method(CompilerPhaseType cpt, int level = 1, int idx = 0) { EventCompilerPhase event; if (event.should_commit()) { event.set_starttime(C->_latest_stage_start_counter); @@ -757,10 +767,15 @@ event.commit(); } - #ifndef PRODUCT - if (_printer && _printer->should_print(level)) { - _printer->print_method(CompilerPhaseTypeHelper::to_string(cpt), level); + if (should_print(level)) { + char output[1024]; + if (idx != 0) { + sprintf(output, "%s:%d", CompilerPhaseTypeHelper::to_string(cpt), idx); + } else { + sprintf(output, "%s", CompilerPhaseTypeHelper::to_string(cpt)); + } + _printer->print_method(output, level); } #endif C->_latest_stage_start_counter.stamp(); diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/idealGraphPrinter.cpp --- a/src/hotspot/share/opto/idealGraphPrinter.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/idealGraphPrinter.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -350,14 +350,6 @@ _xml->flush(); } -// Print indent -void IdealGraphPrinter::print_indent() { - tty->print_cr("printing indent %d", _depth); - for (int i = 0; i < _depth; i++) { - _xml->print("%s", INDENT); - } -} - bool IdealGraphPrinter::traverse_outs() { return _traverse_outs; } @@ -663,14 +655,16 @@ } } -void IdealGraphPrinter::print_method(const char *name, int level, bool clear_nodes) { - print(name, (Node *)C->root(), level, clear_nodes); +void IdealGraphPrinter::print_method(const char *name, int level) { + if (should_print(level)) { + print(name, (Node *) C->root()); + } } // Print current ideal graph -void IdealGraphPrinter::print(const char *name, Node *node, int level, bool clear_nodes) { +void IdealGraphPrinter::print(const char *name, Node *node) { - if (!_current_method || !_should_send_method || !should_print(level)) return; + if (!_current_method || !_should_send_method) return; // Warning, unsafe cast? _chaitin = (PhaseChaitin *)C->regalloc(); diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/idealGraphPrinter.hpp --- a/src/hotspot/share/opto/idealGraphPrinter.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/idealGraphPrinter.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -81,11 +81,7 @@ static const char *METHOD_SHORT_NAME_PROPERTY; static const char *ASSEMBLY_ELEMENT; - elapsedTimer _walk_time; - elapsedTimer _output_time; - elapsedTimer _build_blocks_time; - - static int _file_count; + static int _file_count; networkStream *_stream; xmlStream *_xml; outputStream *_output; @@ -97,10 +93,6 @@ bool _traverse_outs; Compile *C; - static void pre_node(Node* node, void *env); - static void post_node(Node* node, void *env); - - void print_indent(); void print_method(ciMethod *method, int bci, InlineTree *tree); void print_inline_tree(InlineTree *tree); void visit_node(Node *n, bool edges, VectorSet* temp_set); @@ -116,7 +108,6 @@ void tail(const char *name); void head(const char *name); void text(const char *s); - intptr_t get_node_id(Node *n); IdealGraphPrinter(); ~IdealGraphPrinter(); @@ -130,9 +121,8 @@ void print_inlining(); void begin_method(); void end_method(); - void print_method(const char *name, int level=1, bool clear_nodes = false); - void print(const char *name, Node *root, int level=1, bool clear_nodes = false); - void print_xml(const char *name); + void print_method(const char *name, int level = 0); + void print(const char *name, Node *root); bool should_print(int level); void set_compile(Compile* compile) {C = compile; } }; diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/lcm.cpp --- a/src/hotspot/share/opto/lcm.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/lcm.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -171,7 +171,6 @@ case Op_LoadL: case Op_LoadP: case Op_LoadBarrierSlowReg: - case Op_LoadBarrierWeakSlowReg: case Op_LoadN: case Op_LoadS: case Op_LoadKlass: diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/loopnode.cpp --- a/src/hotspot/share/opto/loopnode.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/loopnode.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -978,7 +978,7 @@ wq.push(u); bool found_sfpt = false; for (uint next = 0; next < wq.size() && !found_sfpt; next++) { - Node *n = wq.at(next); + Node* n = wq.at(next); for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) { Node* u = n->fast_out(i); if (u == sfpt) { @@ -992,6 +992,19 @@ assert(found_sfpt, "no node in loop that's not input to safepoint"); } } + + if (UseZGC && !inner_out->in(0)->is_CountedLoopEnd()) { + // In some very special cases there can be a load that has no other uses than the + // counted loop safepoint. Then its loadbarrier will be placed between the inner + // loop exit and the safepoint. This is very rare + + Node* ifnode = inner_out->in(1)->in(0); + // Region->IfTrue->If == Region->Iffalse->If + if (ifnode == inner_out->in(2)->in(0)) { + inner_out = ifnode->in(0); + } + } + CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd(); assert(cle == inner->loopexit_or_null(), "mismatch"); bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0; @@ -2761,7 +2774,7 @@ // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to // its corresponding LoopNode. If 'optimize' is true, do some loop cleanups. void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) { - bool do_split_ifs = (mode == LoopOptsDefault || mode == LoopOptsLastRound); + bool do_split_ifs = (mode == LoopOptsDefault); bool skip_loop_opts = (mode == LoopOptsNone); int old_progress = C->major_progress(); @@ -2921,9 +2934,7 @@ build_loop_late( visited, worklist, nstack ); if (_verify_only) { - // restore major progress flag - for (int i = 0; i < old_progress; i++) - C->set_major_progress(); + C->restore_major_progress(old_progress); assert(C->unique() == unique, "verification mode made Nodes? ? ?"); assert(_igvn._worklist.size() == orig_worklist_size, "shouldn't push anything"); return; @@ -2967,9 +2978,7 @@ if (skip_loop_opts) { // restore major progress flag - for (int i = 0; i < old_progress; i++) { - C->set_major_progress(); - } + C->restore_major_progress(old_progress); // Cleanup any modified bits _igvn.optimize(); @@ -3018,11 +3027,8 @@ // that require basic-block info (like cloning through Phi's) if( SplitIfBlocks && do_split_ifs ) { visited.Clear(); - split_if_with_blocks( visited, nstack, mode == LoopOptsLastRound ); + split_if_with_blocks( visited, nstack); NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); - if (mode == LoopOptsLastRound) { - C->set_major_progress(); - } } if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) { @@ -3157,8 +3163,7 @@ _ltree_root->verify_tree(loop_verify._ltree_root, NULL); // Reset major-progress. It was cleared by creating a verify version of // PhaseIdealLoop. - for( int i=0; iset_major_progress(); + C->restore_major_progress(old_progress); } //------------------------------verify_compare--------------------------------- @@ -4288,7 +4293,6 @@ case Op_LoadS: case Op_LoadP: case Op_LoadBarrierSlowReg: - case Op_LoadBarrierWeakSlowReg: case Op_LoadN: case Op_LoadRange: case Op_LoadD_unaligned: diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/loopnode.hpp --- a/src/hotspot/share/opto/loopnode.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/loopnode.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -824,6 +824,7 @@ // pull such a subsumed block out of the array, we write back the final // correct block. Node *get_ctrl( Node *i ) { + assert(has_node(i), ""); Node *n = get_ctrl_no_update(i); _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); @@ -1306,9 +1307,9 @@ // Check for aggressive application of 'split-if' optimization, // using basic block level info. - void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack, bool last_round ); + void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack); Node *split_if_with_blocks_pre ( Node *n ); - void split_if_with_blocks_post( Node *n, bool last_round ); + void split_if_with_blocks_post( Node *n ); Node *has_local_phi_input( Node *n ); // Mark an IfNode as being dominated by a prior test, // without actually altering the CFG (and hence IDOM info). diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/loopopts.cpp --- a/src/hotspot/share/opto/loopopts.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/loopopts.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -1195,11 +1195,11 @@ // Do the real work in a non-recursive function. CFG hackery wants to be // in the post-order, so it can dirty the I-DOM info and not use the dirtied // info. -void PhaseIdealLoop::split_if_with_blocks_post(Node *n, bool last_round) { +void PhaseIdealLoop::split_if_with_blocks_post(Node *n) { // Cloning Cmp through Phi's involves the split-if transform. // FastLock is not used by an If - if (n->is_Cmp() && !n->is_FastLock() && !last_round) { + if (n->is_Cmp() && !n->is_FastLock()) { Node *n_ctrl = get_ctrl(n); // Determine if the Node has inputs from some local Phi. // Returns the block to clone thru. @@ -1451,18 +1451,12 @@ get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) { _igvn.replace_node( n, n->in(1) ); } - -#if INCLUDE_ZGC - if (UseZGC) { - ZBarrierSetC2::loop_optimize_gc_barrier(this, n, last_round); - } -#endif } //------------------------------split_if_with_blocks--------------------------- // Check for aggressive application of 'split-if' optimization, // using basic block level info. -void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack, bool last_round) { +void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) { Node* root = C->root(); visited.set(root->_idx); // first, mark root as visited // Do pre-visit work for root @@ -1488,7 +1482,7 @@ // All of n's children have been processed, complete post-processing. if (cnt != 0 && !n->is_Con()) { assert(has_node(n), "no dead nodes"); - split_if_with_blocks_post(n, last_round); + split_if_with_blocks_post(n); } if (must_throttle_split_if()) { nstack.clear(); diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/memnode.cpp --- a/src/hotspot/share/opto/memnode.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/memnode.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -908,14 +908,6 @@ // a load node that reads from the source array so we may be able to // optimize out the ArrayCopy node later. Node* LoadNode::can_see_arraycopy_value(Node* st, PhaseGVN* phase) const { -#if INCLUDE_ZGC - if (UseZGC) { - if (bottom_type()->make_oopptr() != NULL) { - return NULL; - } - } -#endif - Node* ld_adr = in(MemNode::Address); intptr_t ld_off = 0; AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off); @@ -2811,7 +2803,8 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ) : Node(required), _type(rt), - _adr_type(at) + _adr_type(at), + _has_barrier(false) { init_req(MemNode::Control, c ); init_req(MemNode::Memory , mem); @@ -3105,16 +3098,6 @@ return NULL; } -#if INCLUDE_ZGC - if (UseZGC) { - if (req() == (Precedent+1) && in(MemBarNode::Precedent)->in(0) != NULL && in(MemBarNode::Precedent)->in(0)->is_LoadBarrier()) { - Node* load_node = in(MemBarNode::Precedent)->in(0)->in(LoadBarrierNode::Oop); - set_req(MemBarNode::Precedent, load_node); - return this; - } - } -#endif - bool progress = false; // Eliminate volatile MemBars for scalar replaced objects. if (can_reshape && req() == (Precedent+1)) { diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/memnode.hpp --- a/src/hotspot/share/opto/memnode.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/memnode.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -164,6 +164,7 @@ Pinned, DependsOnlyOnTest }; + private: // LoadNode::hash() doesn't take the _control_dependency field // into account: If the graph already has a non-pinned LoadNode and @@ -182,6 +183,8 @@ // this field. const MemOrd _mo; + uint _barrier; // Bit field with barrier information + protected: virtual bool cmp(const Node &n) const; virtual uint size_of() const; // Size is bigger @@ -193,7 +196,7 @@ public: LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency) - : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _type(rt) { + : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _barrier(0), _type(rt) { init_class_id(Class_Load); } inline bool is_unordered() const { return !is_acquire(); } @@ -262,6 +265,10 @@ Node* convert_to_unsigned_load(PhaseGVN& gvn); Node* convert_to_signed_load(PhaseGVN& gvn); + void copy_barrier_info(const Node* src) { _barrier = src->as_Load()->_barrier; } + uint barrier_data() { return _barrier; } + void set_barrier_data(uint barrier_data) { _barrier |= barrier_data; } + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif @@ -810,6 +817,7 @@ private: const Type* const _type; // What kind of value is loaded? const TypePtr* _adr_type; // What kind of memory is being addressed? + bool _has_barrier; virtual uint size_of() const; // Size is bigger public: LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ); @@ -822,6 +830,8 @@ bool result_not_used() const; MemBarNode* trailing_membar() const; + void set_has_barrier() { _has_barrier = true; }; + bool has_barrier() const { return _has_barrier; }; }; class LoadStoreConditionalNode : public LoadStoreNode { diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/node.cpp --- a/src/hotspot/share/opto/node.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/node.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -546,6 +546,9 @@ if (n->is_SafePoint()) { n->as_SafePoint()->clone_replaced_nodes(); } + if (n->is_Load()) { + n->as_Load()->copy_barrier_info(this); + } return n; // Return the clone } @@ -564,7 +567,6 @@ } } - //------------------------------~Node------------------------------------------ // Fancy destructor; eagerly attempt to reclaim Node numberings and storage void Node::destruct() { @@ -1454,13 +1456,16 @@ //------------------------------needs_anti_dependence_check--------------------- // Nodes which use memory without consuming it, hence need antidependences. bool Node::needs_anti_dependence_check() const { - if( req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0 ) + if (req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0) { return false; - else - return in(1)->bottom_type()->has_memory(); + } + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + if (!bs->needs_anti_dependence_check(this)) { + return false; + } + return in(1)->bottom_type()->has_memory(); } - // Get an integer constant from a ConNode (or CastIINode). // Return a default value if there is no apparent constant here. const TypeInt* Node::find_int_type() const { diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/node.hpp --- a/src/hotspot/share/opto/node.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/node.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -83,8 +83,8 @@ class LoadNode; class LoadBarrierNode; class LoadBarrierSlowRegNode; -class LoadBarrierWeakSlowRegNode; class LoadStoreNode; +class LoadStoreConditionalNode; class LockNode; class LoopNode; class MachBranchNode; @@ -688,8 +688,7 @@ DEFINE_CLASS_ID(Mem, Node, 4) DEFINE_CLASS_ID(Load, Mem, 0) DEFINE_CLASS_ID(LoadVector, Load, 0) - DEFINE_CLASS_ID(LoadBarrierSlowReg, Load, 1) - DEFINE_CLASS_ID(LoadBarrierWeakSlowReg, Load, 2) + DEFINE_CLASS_ID(LoadBarrierSlowReg, Load, 1) DEFINE_CLASS_ID(Store, Mem, 1) DEFINE_CLASS_ID(StoreVector, Store, 0) DEFINE_CLASS_ID(LoadStore, Mem, 2) @@ -830,9 +829,9 @@ DEFINE_CLASS_QUERY(JumpProj) DEFINE_CLASS_QUERY(Load) DEFINE_CLASS_QUERY(LoadStore) + DEFINE_CLASS_QUERY(LoadStoreConditional) DEFINE_CLASS_QUERY(LoadBarrier) DEFINE_CLASS_QUERY(LoadBarrierSlowReg) - DEFINE_CLASS_QUERY(LoadBarrierWeakSlowReg) DEFINE_CLASS_QUERY(Lock) DEFINE_CLASS_QUERY(Loop) DEFINE_CLASS_QUERY(Mach) diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/phaseX.cpp --- a/src/hotspot/share/opto/phaseX.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/phaseX.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -1003,9 +1003,6 @@ n->is_Mem() ) add_users_to_worklist(n); } - - BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); - bs->add_users_to_worklist(&_worklist); } /** diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/phasetype.hpp --- a/src/hotspot/share/opto/phasetype.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/phasetype.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -52,8 +52,11 @@ PHASE_MATCHING, PHASE_INCREMENTAL_INLINE, PHASE_INCREMENTAL_BOXING_INLINE, - PHASE_BEFORE_BARRIER_EXPAND, - PHASE_BEFORE_MACRO_EXPANSION, + PHASE_CALL_CATCH_CLEANUP, + PHASE_INSERT_BARRIER, + PHASE_MACRO_EXPANSION, + PHASE_BARRIER_EXPANSION, + PHASE_ADD_UNSAFE_BARRIER, PHASE_END, PHASE_FAILURE, @@ -90,8 +93,11 @@ case PHASE_MATCHING: return "After matching"; case PHASE_INCREMENTAL_INLINE: return "Incremental Inline"; case PHASE_INCREMENTAL_BOXING_INLINE: return "Incremental Boxing Inline"; - case PHASE_BEFORE_BARRIER_EXPAND: return "Before Barrier Expand"; - case PHASE_BEFORE_MACRO_EXPANSION: return "Before macro expansion"; + case PHASE_CALL_CATCH_CLEANUP: return "Call catch cleanup"; + case PHASE_INSERT_BARRIER: return "Insert barrier"; + case PHASE_MACRO_EXPANSION: return "Macro expand"; + case PHASE_BARRIER_EXPANSION: return "Barrier expand"; + case PHASE_ADD_UNSAFE_BARRIER: return "Add barrier to unsafe op"; case PHASE_END: return "End"; case PHASE_FAILURE: return "Failure"; default: diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/opto/vectornode.cpp --- a/src/hotspot/share/opto/vectornode.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/opto/vectornode.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -298,7 +298,6 @@ case Op_LoadF: case Op_LoadD: case Op_LoadP: case Op_LoadN: case Op_LoadBarrierSlowReg: - case Op_LoadBarrierWeakSlowReg: *start = 0; *end = 0; // no vector operands break; diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/runtime/stackValue.cpp --- a/src/hotspot/share/runtime/stackValue.cpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/runtime/stackValue.cpp Thu Feb 14 14:54:05 2019 +0100 @@ -133,16 +133,12 @@ } #endif // Deoptimization must make sure all oops have passed load barriers -#if INCLUDE_ZGC - if (UseZGC) { - val = ZBarrier::load_barrier_on_oop_field_preloaded((oop*)value_addr, val); - } -#endif #if INCLUDE_SHENANDOAHGC if (UseShenandoahGC) { val = ShenandoahBarrierSet::barrier_set()->load_reference_barrier(val); } #endif + assert(oopDesc::is_oop_or_null(val, false), "bad oop found"); Handle h(Thread::current(), val); // Wrap a handle around the oop return new StackValue(h); } diff -r ea43db53de91 -r ed12027517c0 src/hotspot/share/utilities/growableArray.hpp --- a/src/hotspot/share/utilities/growableArray.hpp Mon Jun 10 05:09:52 2019 +0200 +++ b/src/hotspot/share/utilities/growableArray.hpp Thu Feb 14 14:54:05 2019 +0100 @@ -152,6 +152,12 @@ template class GrowableArrayIterator; template class GrowableArrayFilterIterator; +template +class CompareClosure : public Closure { +public: + virtual int do_compare(const E&, const E&) = 0; +}; + template class GrowableArray : public GenericGrowableArray { friend class VMStructs; @@ -443,6 +449,37 @@ } return min; } + + E insert_sorted(CompareClosure* cc, const E& key) { + bool found; + int location = find_sorted(cc, key, found); + if (!found) { + insert_before(location, key); + } + return at(location); + } + + template + int find_sorted(CompareClosure* cc, const K& key, bool& found) { + found = false; + int min = 0; + int max = length() - 1; + + while (max >= min) { + int mid = (int)(((uint)max + min) / 2); + E value = at(mid); + int diff = cc->do_compare(key, value); + if (diff > 0) { + min = mid + 1; + } else if (diff < 0) { + max = mid - 1; + } else { + found = true; + return mid; + } + } + return min; + } }; // Global GrowableArray methods (one instance in the library per each 'E' type).