# HG changeset patch # User mcberg # Date 1434496236 25200 # Node ID 7bf7e00a1aac8d48a73343b31a73329d25f5e251 # Parent 7033a9f3e2f1175fa8beda9f0ed5f35fbf138d22 8080325: SuperWord loop unrolling analysis Summary: Determine loop unroll factor based on supported vectors sizes. Reviewed-by: roland, kvn diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/cpu/x86/vm/c2_init_x86.cpp --- a/hotspot/src/cpu/x86/vm/c2_init_x86.cpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/cpu/x86/vm/c2_init_x86.cpp Tue Jun 16 16:10:36 2015 -0700 @@ -58,4 +58,6 @@ OptoReg::invalidate(i); } } + + SuperWordLoopUnrollAnalysis = true; } diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/share/vm/opto/c2_globals.hpp --- a/hotspot/src/share/vm/opto/c2_globals.hpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/share/vm/opto/c2_globals.hpp Tue Jun 16 16:10:36 2015 -0700 @@ -191,6 +191,13 @@ product(intx, LoopMaxUnroll, 16, \ "Maximum number of unrolls for main loop") \ \ + product(bool, SuperWordLoopUnrollAnalysis, false, \ + "Map number of unrolls for main loop via " \ + "Superword Level Parallelism analysis") \ + \ + notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \ + "Trace what Superword Level Parallelism analysis applies") \ + \ product(intx, LoopUnrollMin, 4, \ "Minimum number of unroll loop bodies before checking progress" \ "of rounds of unroll,optimize,..") \ diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/share/vm/opto/loopTransform.cpp --- a/hotspot/src/share/vm/opto/loopTransform.cpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/share/vm/opto/loopTransform.cpp Tue Jun 16 16:10:36 2015 -0700 @@ -38,6 +38,7 @@ #include "opto/rootnode.hpp" #include "opto/runtime.hpp" #include "opto/subnode.hpp" +#include "opto/superword.hpp" #include "opto/vectornode.hpp" //------------------------------is_loop_exit----------------------------------- @@ -640,7 +641,7 @@ //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. -bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const { +bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), ""); @@ -652,6 +653,8 @@ // After split at least one iteration will be executed in pre-loop. if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; + _local_loop_unroll_limit = LoopUnrollLimit; + _local_loop_unroll_factor = 4; int future_unroll_ct = cl->unrolled_count() * 2; if (future_unroll_ct > LoopMaxUnroll) return false; @@ -747,8 +750,24 @@ } // switch } + if (UseSuperWord) { + if (!cl->is_reduction_loop()) { + phase->mark_reductions(this); + } + + // Only attempt slp analysis when user controls do not prohibit it + if (LoopMaxUnroll > _local_loop_unroll_factor) { + // Once policy_slp_analysis succeeds, mark the loop with the + // maximal unroll factor so that we minimize analysis passes + if ((future_unroll_ct > _local_loop_unroll_factor) || + (body_size > (uint)_local_loop_unroll_limit)) { + policy_unroll_slp_analysis(cl, phase, future_unroll_ct); + } + } + } + // Check for being too big - if (body_size > (uint)LoopUnrollLimit) { + if (body_size > (uint)_local_loop_unroll_limit) { if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; // Normal case: loop too big return false; @@ -758,6 +777,36 @@ return true; } +void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct) { + // Enable this functionality target by target as needed + if (SuperWordLoopUnrollAnalysis) { + if (!cl->has_passed_slp()) { + SuperWord sw(phase); + sw.transform_loop(this, false); + + // If the loop is slp canonical analyze it + if (sw.early_return() == false) { + sw.unrolling_analysis(cl, _local_loop_unroll_factor); + } + } + + int slp_max_unroll_factor = cl->slp_max_unroll(); + if ((slp_max_unroll_factor > 4) && + (slp_max_unroll_factor >= future_unroll_ct)) { + int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor; + if (new_limit > LoopUnrollLimit) { +#ifndef PRODUCT + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis is applying unroll limit %d, the original limit was %d\n", + new_limit, _local_loop_unroll_limit); + } +#endif + _local_loop_unroll_limit = new_limit; + } + } + } +} + //------------------------------policy_align----------------------------------- // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the // expression that does the alignment. Note that only one array base can be @@ -1611,6 +1660,7 @@ // iff the uses conform if (ok) { def_node->add_flag(Node::Flag_is_reduction); + loop_head->mark_has_reductions(); } } } @@ -2517,7 +2567,6 @@ // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. if (should_unroll && !should_peel) { - phase->mark_reductions(this); phase->do_unroll(this, old_new, true); } diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/share/vm/opto/loopnode.cpp --- a/hotspot/src/share/vm/opto/loopnode.cpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/share/vm/opto/loopnode.cpp Tue Jun 16 16:10:36 2015 -0700 @@ -2408,7 +2408,7 @@ for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { IdealLoopTree* lpt = iter.current(); if (lpt->is_counted()) { - sw.transform_loop(lpt); + sw.transform_loop(lpt, true); } } } diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/share/vm/opto/loopnode.hpp --- a/hotspot/src/share/vm/opto/loopnode.hpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/share/vm/opto/loopnode.hpp Tue Jun 16 16:10:36 2015 -0700 @@ -62,7 +62,9 @@ HasExactTripCount=8, InnerLoop=16, PartialPeelLoop=32, - PartialPeelFailed=64 }; + PartialPeelFailed=64, + HasReductions=128, + PassedSlpAnalysis=256 }; char _unswitch_count; enum { _unswitch_max=3 }; @@ -77,6 +79,8 @@ void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } + void mark_has_reductions() { _loop_flags |= HasReductions; } + void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; } int unswitch_max() { return _unswitch_max; } int unswitch_count() { return _unswitch_count; } @@ -155,11 +159,15 @@ // unroll,optimize,unroll,optimize,... is making progress int _node_count_before_unroll; + // If slp analysis is performed we record the maximum + // vector mapped unroll factor here + int _slp_maximum_unroll_factor; + public: CountedLoopNode( Node *entry, Node *backedge ) : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), - _node_count_before_unroll(0) { + _node_count_before_unroll(0), _slp_maximum_unroll_factor(0) { init_class_id(Class_CountedLoop); // Initialize _trip_count to the largest possible value. // Will be reset (lower) if the loop's trip count is known. @@ -199,10 +207,12 @@ // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or // Aligned, may be missing it's pre-loop. - int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } - int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } - int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } - int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } + int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } + int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } + int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } + int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } + int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } + int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } @@ -232,8 +242,10 @@ void double_unrolled_count() { _unrolled_count_log2++; } int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } - void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } - int node_count_before_unroll() { return _node_count_before_unroll; } + void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } + int node_count_before_unroll() { return _node_count_before_unroll; } + void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; } + int slp_max_unroll() const { return _slp_maximum_unroll_factor; } #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; @@ -336,6 +348,8 @@ Node *_tail; // Tail of loop inline Node *tail(); // Handle lazy update of _tail field PhaseIdealLoop* _phase; + int _local_loop_unroll_limit; + int _local_loop_unroll_factor; Node_List _body; // Loop body for inner loops @@ -356,7 +370,8 @@ _safepts(NULL), _required_safept(NULL), _allow_optimizations(true), - _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) + _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0), + _local_loop_unroll_limit(0), _local_loop_unroll_factor(0) { } // Is 'l' a member of 'this'? @@ -444,7 +459,10 @@ // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. - bool policy_unroll( PhaseIdealLoop *phase ) const; + bool policy_unroll(PhaseIdealLoop *phase); + + // Loop analyses to map to a maximal superword unrolling for vectorization. + void policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_ct); // Return TRUE or FALSE if the loop should be range-check-eliminated. // Gather a list of IF tests that are dominated by iteration splitting; diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/share/vm/opto/superword.cpp --- a/hotspot/src/share/vm/opto/superword.cpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/share/vm/opto/superword.cpp Tue Jun 16 16:10:36 2015 -0700 @@ -68,6 +68,7 @@ _bb(NULL), // basic block _iv(NULL), // induction var _race_possible(false), // cases where SDMU is true + _early_return(true), // analysis evaluations routine _num_work_vecs(0), // amount of vector work we have _num_reductions(0), // amount of reduction work we have _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style @@ -78,7 +79,7 @@ {} //------------------------------transform_loop--------------------------- -void SuperWord::transform_loop(IdealLoopTree* lpt) { +void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { assert(UseSuperWord, "should be"); // Do vectors exist on this architecture? if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; @@ -113,8 +114,158 @@ // For now, define one block which is the entire loop body set_bb(cl); - assert(_packset.length() == 0, "packset must be empty"); - SLP_extract(); + if (do_optimization) { + assert(_packset.length() == 0, "packset must be empty"); + SLP_extract(); + } +} + +//------------------------------early unrolling analysis------------------------------ +void SuperWord::unrolling_analysis(CountedLoopNode *cl, int &local_loop_unroll_factor) { + bool is_slp = true; + ResourceMark rm; + size_t ignored_size = lpt()->_body.size(); + int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size); + Node_Stack nstack((int)ignored_size); + Node *cl_exit = cl->loopexit(); + + // First clear the entries + for (uint i = 0; i < lpt()->_body.size(); i++) { + ignored_loop_nodes[i] = -1; + } + + int max_vector = Matcher::max_vector_size(T_INT); + + // Process the loop, some/all of the stack entries will not be in order, ergo + // need to preprocess the ignored initial state before we process the loop + for (uint i = 0; i < lpt()->_body.size(); i++) { + Node* n = lpt()->_body.at(i); + if (n == cl->incr() || + n->is_reduction() || + n->is_AddP() || + n->is_Cmp() || + n->is_IfTrue() || + n->is_CountedLoop() || + (n == cl_exit)) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + + if (n->is_If()) { + IfNode *iff = n->as_If(); + if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) { + if (lpt()->is_loop_exit(iff)) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + } + } + + if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) { + Node* n_tail = n->in(LoopNode::LoopBackControl); + if (n_tail != n->in(LoopNode::EntryControl)) { + if (!n_tail->is_Mem()) { + is_slp = false; + break; + } + } + } + + // This must happen after check of phi/if + if (n->is_Phi() || n->is_If()) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + + if (n->is_LoadStore() || n->is_MergeMem() || + (n->is_Proj() && !n->as_Proj()->is_CFG())) { + is_slp = false; + break; + } + + if (n->is_Mem()) { + Node* adr = n->in(MemNode::Address); + Node* n_ctrl = _phase->get_ctrl(adr); + + // save a queue of post process nodes + if (n_ctrl != NULL && lpt()->is_member(_phase->get_loop(n_ctrl))) { + MemNode* current = n->as_Mem(); + BasicType bt = current->memory_type(); + if (is_java_primitive(bt) == false) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + + // Process the memory expression + int stack_idx = 0; + bool have_side_effects = true; + if (adr->is_AddP() == false) { + nstack.push(adr, stack_idx++); + } else { + // Mark the components of the memory operation in nstack + SWPointer p1(current, this, &nstack, true); + have_side_effects = p1.node_stack()->is_nonempty(); + } + + // Process the pointer stack + while (have_side_effects) { + Node* pointer_node = nstack.node(); + for (uint j = 0; j < lpt()->_body.size(); j++) { + Node* cur_node = lpt()->_body.at(j); + if (cur_node == pointer_node) { + ignored_loop_nodes[j] = cur_node->_idx; + break; + } + } + nstack.pop(); + have_side_effects = nstack.is_nonempty(); + } + } + } + } + + if (is_slp) { + // Now we try to find the maximum supported consistent vector which the machine + // description can use + for (uint i = 0; i < lpt()->_body.size(); i++) { + if (ignored_loop_nodes[i] != -1) continue; + + BasicType bt; + Node* n = lpt()->_body.at(i); + if (n->is_Store()) { + bt = n->as_Mem()->memory_type(); + } + else { + bt = n->bottom_type()->basic_type(); + } + + int cur_max_vector = Matcher::max_vector_size(bt); + + // If a max vector exists which is not larger than _local_loop_unroll_factor + // stop looking, we already have the max vector to map to. + if (cur_max_vector <= local_loop_unroll_factor) { + is_slp = false; +#ifndef PRODUCT + if (TraceSuperWordLoopUnrollAnalysis) { + tty->print_cr("slp analysis fails: unroll limit equals max vector\n"); + } +#endif + break; + } + + // Map the maximal common vector + if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { + if (cur_max_vector < max_vector) { + max_vector = cur_max_vector; + } + } + } + if (is_slp) { + local_loop_unroll_factor = max_vector; + } + cl->mark_passed_slp(); + cl->set_slp_max_unroll(local_loop_unroll_factor); + } } //------------------------------SLP_extract--------------------------- @@ -268,12 +419,12 @@ best_iv_adjustment = iv_adjustment; } - SWPointer align_to_ref_p(mem_ref, this); + SWPointer align_to_ref_p(mem_ref, this, NULL, false); // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); if (isomorphic(s, mem_ref)) { - SWPointer p2(s, this); + SWPointer p2(s, this, NULL, false); if (p2.comparable(align_to_ref_p)) { int align = memory_alignment(s, iv_adjustment); set_alignment(s, align); @@ -294,7 +445,7 @@ // iterations in pre-loop will be not enough to align it. create_pack = false; } else { - SWPointer p2(best_align_to_mem_ref, this); + SWPointer p2(best_align_to_mem_ref, this, NULL, false); if (align_to_ref_p.invar() != p2.invar()) { // Do not vectorize memory accesses with different invariants // if unaligned memory accesses are not allowed. @@ -411,7 +562,7 @@ // Count number of comparable memory ops for (uint i = 0; i < memops.size(); i++) { MemNode* s1 = memops.at(i)->as_Mem(); - SWPointer p1(s1, this); + SWPointer p1(s1, this, NULL, false); // Discard if pre loop can't align this reference if (!ref_is_alignable(p1)) { *cmp_ct.adr_at(i) = 0; @@ -420,7 +571,7 @@ for (uint j = i+1; j < memops.size(); j++) { MemNode* s2 = memops.at(j)->as_Mem(); if (isomorphic(s1, s2)) { - SWPointer p2(s2, this); + SWPointer p2(s2, this, NULL, false); if (p1.comparable(p2)) { (*cmp_ct.adr_at(i))++; (*cmp_ct.adr_at(j))++; @@ -441,7 +592,7 @@ if (s->is_Store()) { int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); - SWPointer p(s, this); + SWPointer p(s, this, NULL, false); if (cmp_ct.at(j) > max_ct || cmp_ct.at(j) == max_ct && (vw > max_vw || @@ -464,7 +615,7 @@ if (s->is_Load()) { int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); - SWPointer p(s, this); + SWPointer p(s, this, NULL, false); if (cmp_ct.at(j) > max_ct || cmp_ct.at(j) == max_ct && (vw > max_vw || @@ -575,7 +726,7 @@ //---------------------------get_iv_adjustment--------------------------- // Calculate loop's iv adjustment for this memory ops. int SuperWord::get_iv_adjustment(MemNode* mem_ref) { - SWPointer align_to_ref_p(mem_ref, this); + SWPointer align_to_ref_p(mem_ref, this, NULL, false); int offset = align_to_ref_p.offset_in_bytes(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); @@ -649,13 +800,13 @@ if (_dg.dep(s1)->in_cnt() == 0) { _dg.make_edge(slice, s1); } - SWPointer p1(s1->as_Mem(), this); + SWPointer p1(s1->as_Mem(), this, NULL, false); bool sink_dependent = true; for (int k = j - 1; k >= 0; k--) { Node* s2 = _nlist.at(k); if (s1->is_Load() && s2->is_Load()) continue; - SWPointer p2(s2->as_Mem(), this); + SWPointer p2(s2->as_Mem(), this, NULL, false); int cmp = p1.cmp(p2); if (SuperWordRTDepCheck && @@ -795,8 +946,8 @@ if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != _phase->C->get_alias_index(s2->as_Mem()->adr_type())) return false; - SWPointer p1(s1->as_Mem(), this); - SWPointer p2(s2->as_Mem(), this); + SWPointer p1(s1->as_Mem(), this, NULL, false); + SWPointer p2(s2->as_Mem(), this, NULL, false); if (p1.base() != p2.base() || !p1.comparable(p2)) return false; int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); return diff == data_size(s1); @@ -1615,13 +1766,13 @@ if (n->is_Load()) { Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); - SWPointer p1(n->as_Mem(), this); + SWPointer p1(n->as_Mem(), this, NULL, false); // Identify the memory dependency for the new loadVector node by // walking up through memory chain. // This is done to give flexibility to the new loadVector node so that // it can move above independent storeVector nodes. while (mem->is_StoreVector()) { - SWPointer p2(mem->as_Mem(), this); + SWPointer p2(mem->as_Mem(), this, NULL, false); int cmp = p1.cmp(p2); if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { mem = mem->in(MemNode::Memory); @@ -2138,7 +2289,7 @@ //------------------------------memory_alignment--------------------------- // Alignment within a vector memory reference int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { - SWPointer p(s, this); + SWPointer p(s, this, NULL, false); if (!p.valid()) { return bottom_align; } @@ -2315,7 +2466,7 @@ Node *orig_limit = pre_opaq->original_loop_limit(); assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); - SWPointer align_to_ref_p(align_to_ref, this); + SWPointer align_to_ref_p(align_to_ref, this, NULL, false); assert(align_to_ref_p.valid(), "sanity"); // Given: @@ -2489,6 +2640,7 @@ _bb = NULL; _iv = NULL; _race_possible = 0; + _early_return = false; _num_work_vecs = 0; _num_reductions = 0; } @@ -2559,9 +2711,11 @@ //==============================SWPointer=========================== //----------------------------SWPointer------------------------ -SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : +SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : _mem(mem), _slp(slp), _base(NULL), _adr(NULL), - _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { + _scale(0), _offset(0), _invar(NULL), _negate_invar(false), + _nstack(nstack), _analyze_only(analyze_only), + _stack_idx(0) { Node* adr = mem->in(MemNode::Address); if (!adr->is_AddP()) { @@ -2599,7 +2753,9 @@ // the pattern match of an address expression. SWPointer::SWPointer(SWPointer* p) : _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), - _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} + _scale(0), _offset(0), _invar(NULL), _negate_invar(false), + _nstack(p->_nstack), _analyze_only(p->_analyze_only), + _stack_idx(p->_stack_idx) {} //------------------------scaled_iv_plus_offset-------------------- // Match: k*iv + offset @@ -2642,6 +2798,9 @@ _scale = 1; return true; } + if (_analyze_only && (invariant(n) == false)) { + _nstack->push(n, _stack_idx++); + } int opc = n->Opcode(); if (opc == Op_MulI) { if (n->in(1) == iv() && n->in(2)->is_Con()) { @@ -2699,6 +2858,9 @@ return false; } if (_invar != NULL) return false; // already have an invariant + if (_analyze_only && (invariant(n) == false)) { + _nstack->push(n, _stack_idx++); + } if (opc == Op_AddI) { if (n->in(2)->is_Con() && invariant(n->in(1))) { _negate_invar = negate; diff -r 7033a9f3e2f1 -r 7bf7e00a1aac hotspot/src/share/vm/opto/superword.hpp --- a/hotspot/src/share/vm/opto/superword.hpp Tue Jun 09 12:09:03 2015 +0200 +++ b/hotspot/src/share/vm/opto/superword.hpp Tue Jun 16 16:10:36 2015 -0700 @@ -239,12 +239,15 @@ public: SuperWord(PhaseIdealLoop* phase); - void transform_loop(IdealLoopTree* lpt); + void transform_loop(IdealLoopTree* lpt, bool do_optimization); + + void unrolling_analysis(CountedLoopNode *cl, int &local_loop_unroll_factor); // Accessors for SWPointer PhaseIdealLoop* phase() { return _phase; } IdealLoopTree* lpt() { return _lpt; } PhiNode* iv() { return _iv; } + bool early_return() { return _early_return; } private: IdealLoopTree* _lpt; // Current loop tree node @@ -252,6 +255,7 @@ Node* _bb; // Current basic block PhiNode* _iv; // Induction var bool _race_possible; // In cases where SDMU is true + bool _early_return; // True if we do not initialize bool _do_vector_loop; // whether to do vectorization/simd style bool _vector_loop_debug; // provide more printing in debug mode int _num_work_vecs; // Number of non memory vector operations @@ -462,15 +466,18 @@ // Information about an address for dependence checking and vector alignment class SWPointer VALUE_OBJ_CLASS_SPEC { protected: - MemNode* _mem; // My memory reference node - SuperWord* _slp; // SuperWord class + MemNode* _mem; // My memory reference node + SuperWord* _slp; // SuperWord class - Node* _base; // NULL if unsafe nonheap reference - Node* _adr; // address pointer - jint _scale; // multiplier for iv (in bytes), 0 if no loop iv - jint _offset; // constant offset (in bytes) - Node* _invar; // invariant offset (in bytes), NULL if none - bool _negate_invar; // if true then use: (0 - _invar) + Node* _base; // NULL if unsafe nonheap reference + Node* _adr; // address pointer + jint _scale; // multiplier for iv (in bytes), 0 if no loop iv + jint _offset; // constant offset (in bytes) + Node* _invar; // invariant offset (in bytes), NULL if none + bool _negate_invar; // if true then use: (0 - _invar) + Node_Stack* _nstack; // stack used to record a swpointer trace of variants + bool _analyze_only; // Used in loop unrolling only for swpointer trace + uint _stack_idx; // Used in loop unrolling only for swpointer trace PhaseIdealLoop* phase() { return _slp->phase(); } IdealLoopTree* lpt() { return _slp->lpt(); } @@ -497,7 +504,7 @@ NotComparable = (Less | Greater | Equal) }; - SWPointer(MemNode* mem, SuperWord* slp); + SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only); // Following is used to create a temporary object during // the pattern match of an address expression. SWPointer(SWPointer* p); @@ -505,14 +512,15 @@ bool valid() { return _adr != NULL; } bool has_iv() { return _scale != 0; } - Node* base() { return _base; } - Node* adr() { return _adr; } - MemNode* mem() { return _mem; } - int scale_in_bytes() { return _scale; } - Node* invar() { return _invar; } - bool negate_invar() { return _negate_invar; } - int offset_in_bytes() { return _offset; } - int memory_size() { return _mem->memory_size(); } + Node* base() { return _base; } + Node* adr() { return _adr; } + MemNode* mem() { return _mem; } + int scale_in_bytes() { return _scale; } + Node* invar() { return _invar; } + bool negate_invar() { return _negate_invar; } + int offset_in_bytes() { return _offset; } + int memory_size() { return _mem->memory_size(); } + Node_Stack* node_stack() { return _nstack; } // Comparable? int cmp(SWPointer& q) {