src/hotspot/share/opto/superword.cpp
changeset 52992 4bb6e0871bf7
parent 51333 f6641fcf7b7e
child 53336 36ca868f266f
equal deleted inserted replaced
52991:9e28eff3d40f 52992:4bb6e0871bf7
   643         } else {
   643         } else {
   644           // Check if packs of the same memory type but
   644           // Check if packs of the same memory type but
   645           // with a different alignment were created before.
   645           // with a different alignment were created before.
   646           for (uint i = 0; i < align_to_refs.size(); i++) {
   646           for (uint i = 0; i < align_to_refs.size(); i++) {
   647             MemNode* mr = align_to_refs.at(i)->as_Mem();
   647             MemNode* mr = align_to_refs.at(i)->as_Mem();
       
   648             if (mr == mem_ref) {
       
   649               // Skip when we are looking at same memory operation.
       
   650               continue;
       
   651             }
   648             if (same_velt_type(mr, mem_ref) &&
   652             if (same_velt_type(mr, mem_ref) &&
   649                 memory_alignment(mr, iv_adjustment) != 0)
   653                 memory_alignment(mr, iv_adjustment) != 0)
   650               create_pack = false;
   654               create_pack = false;
   651           }
   655           }
   652         }
   656         }
   844     return memops.at(max_idx)->as_Mem();
   848     return memops.at(max_idx)->as_Mem();
   845   }
   849   }
   846   return NULL;
   850   return NULL;
   847 }
   851 }
   848 
   852 
       
   853 //------------------span_works_for_memory_size-----------------------------
       
   854 static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) {
       
   855   bool span_matches_memory = false;
       
   856   if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT))
       
   857     && ABS(span) == type2aelembytes(T_INT)) {
       
   858     // There is a mismatch on span size compared to memory.
       
   859     for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
       
   860       Node* use = mem->fast_out(j);
       
   861       if (!VectorNode::is_type_transition_to_int(use)) {
       
   862         return false;
       
   863       }
       
   864     }
       
   865     // If all uses transition to integer, it means that we can successfully align even on mismatch.
       
   866     return true;
       
   867   }
       
   868   else {
       
   869     span_matches_memory = ABS(span) == mem_size;
       
   870   }
       
   871   return span_matches_memory && (ABS(offset) % mem_size) == 0;
       
   872 }
       
   873 
   849 //------------------------------ref_is_alignable---------------------------
   874 //------------------------------ref_is_alignable---------------------------
   850 // Can the preloop align the reference to position zero in the vector?
   875 // Can the preloop align the reference to position zero in the vector?
   851 bool SuperWord::ref_is_alignable(SWPointer& p) {
   876 bool SuperWord::ref_is_alignable(SWPointer& p) {
   852   if (!p.has_iv()) {
   877   if (!p.has_iv()) {
   853     return true;   // no induction variable
   878     return true;   // no induction variable
   860   int span = preloop_stride * p.scale_in_bytes();
   885   int span = preloop_stride * p.scale_in_bytes();
   861   int mem_size = p.memory_size();
   886   int mem_size = p.memory_size();
   862   int offset   = p.offset_in_bytes();
   887   int offset   = p.offset_in_bytes();
   863   // Stride one accesses are alignable if offset is aligned to memory operation size.
   888   // Stride one accesses are alignable if offset is aligned to memory operation size.
   864   // Offset can be unaligned when UseUnalignedAccesses is used.
   889   // Offset can be unaligned when UseUnalignedAccesses is used.
   865   if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
   890   if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) {
   866     return true;
   891     return true;
   867   }
   892   }
   868   // If the initial offset from start of the object is computable,
   893   // If the initial offset from start of the object is computable,
   869   // check if the pre-loop can align the final offset accordingly.
   894   // check if the pre-loop can align the final offset accordingly.
   870   //
   895   //
   913       return (init_offset % vw) == 0;
   938       return (init_offset % vw) == 0;
   914     }
   939     }
   915   }
   940   }
   916   return false;
   941   return false;
   917 }
   942 }
       
   943 //---------------------------get_vw_bytes_special------------------------
       
   944 int SuperWord::get_vw_bytes_special(MemNode* s) {
       
   945   // Get the vector width in bytes.
       
   946   int vw = vector_width_in_bytes(s);
       
   947 
       
   948   // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined.
       
   949   BasicType btype = velt_basic_type(s);
       
   950   if (type2aelembytes(btype) == 2) {
       
   951     bool should_combine_adjacent = true;
       
   952     for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
       
   953       Node* user = s->fast_out(i);
       
   954       if (!VectorNode::is_muladds2i(user)) {
       
   955         should_combine_adjacent = false;
       
   956       }
       
   957     }
       
   958     if (should_combine_adjacent) {
       
   959       vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
       
   960     }
       
   961   }
       
   962 
       
   963   return vw;
       
   964 }
   918 
   965 
   919 //---------------------------get_iv_adjustment---------------------------
   966 //---------------------------get_iv_adjustment---------------------------
   920 // Calculate loop's iv adjustment for this memory ops.
   967 // Calculate loop's iv adjustment for this memory ops.
   921 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
   968 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
   922   SWPointer align_to_ref_p(mem_ref, this, NULL, false);
   969   SWPointer align_to_ref_p(mem_ref, this, NULL, false);
   923   int offset = align_to_ref_p.offset_in_bytes();
   970   int offset = align_to_ref_p.offset_in_bytes();
   924   int scale  = align_to_ref_p.scale_in_bytes();
   971   int scale  = align_to_ref_p.scale_in_bytes();
   925   int elt_size = align_to_ref_p.memory_size();
   972   int elt_size = align_to_ref_p.memory_size();
   926   int vw       = vector_width_in_bytes(mem_ref);
   973   int vw       = get_vw_bytes_special(mem_ref);
   927   assert(vw > 1, "sanity");
   974   assert(vw > 1, "sanity");
   928   int iv_adjustment;
   975   int iv_adjustment;
   929   if (scale != 0) {
   976   if (scale != 0) {
   930     int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
   977     int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
   931     // At least one iteration is executed in pre-loop by default. As result
   978     // At least one iteration is executed in pre-loop by default. As result
  2301         Node* mem = first->in(MemNode::Memory);
  2348         Node* mem = first->in(MemNode::Memory);
  2302         Node* adr = low_adr->in(MemNode::Address);
  2349         Node* adr = low_adr->in(MemNode::Address);
  2303         const TypePtr* atyp = n->adr_type();
  2350         const TypePtr* atyp = n->adr_type();
  2304         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
  2351         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
  2305         vlen_in_bytes = vn->as_StoreVector()->memory_size();
  2352         vlen_in_bytes = vn->as_StoreVector()->memory_size();
       
  2353       } else if (VectorNode::is_muladds2i(n)) {
       
  2354         assert(n->req() == 5u, "MulAddS2I should have 4 operands.");
       
  2355         Node* in1 = vector_opd(p, 1);
       
  2356         Node* in2 = vector_opd(p, 2);
       
  2357         vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
       
  2358         vlen_in_bytes = vn->as_Vector()->length_in_bytes();
  2306       } else if (n->req() == 3 && !is_cmov_pack(p)) {
  2359       } else if (n->req() == 3 && !is_cmov_pack(p)) {
  2307         // Promote operands to vector
  2360         // Promote operands to vector
  2308         Node* in1 = NULL;
  2361         Node* in1 = NULL;
  2309         bool node_isa_reduction = n->is_reduction();
  2362         bool node_isa_reduction = n->is_reduction();
  2310         if (node_isa_reduction) {
  2363         if (node_isa_reduction) {
  2613       NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
  2666       NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
  2614       return NULL;
  2667       return NULL;
  2615     }
  2668     }
  2616     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
  2669     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
  2617     pk->add_opd(in);
  2670     pk->add_opd(in);
       
  2671     if (VectorNode::is_muladds2i(pi)) {
       
  2672       Node* in2 = pi->in(opd_idx + 2);
       
  2673       assert(my_pack(in2) == NULL, "Should already have been unpacked");
       
  2674       if (my_pack(in2) != NULL) {
       
  2675         NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })
       
  2676           return NULL;
       
  2677       }
       
  2678       assert(opd_bt == in2->bottom_type()->basic_type(), "all same type");
       
  2679       pk->add_opd(in2);
       
  2680     }
  2618   }
  2681   }
  2619   _igvn.register_new_node_with_optimizer(pk);
  2682   _igvn.register_new_node_with_optimizer(pk);
  2620   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
  2683   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
  2621 #ifdef ASSERT
  2684 #ifdef ASSERT
  2622   if (TraceNewVectors) {
  2685   if (TraceNewVectors) {
  2687   if (d_pk == NULL) {
  2750   if (d_pk == NULL) {
  2688     // check for scalar promotion
  2751     // check for scalar promotion
  2689     Node* n = u_pk->at(0)->in(u_idx);
  2752     Node* n = u_pk->at(0)->in(u_idx);
  2690     for (uint i = 1; i < u_pk->size(); i++) {
  2753     for (uint i = 1; i < u_pk->size(); i++) {
  2691       if (u_pk->at(i)->in(u_idx) != n) return false;
  2754       if (u_pk->at(i)->in(u_idx) != n) return false;
       
  2755     }
       
  2756     return true;
       
  2757   }
       
  2758   if (VectorNode::is_muladds2i(use)) {
       
  2759     // MulAddS2I takes shorts and produces ints - hence the special checks
       
  2760     // on alignment and size.
       
  2761     if (u_pk->size() * 2 != d_pk->size()) {
       
  2762       return false;
       
  2763     }
       
  2764     for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
       
  2765       Node* ui = u_pk->at(i);
       
  2766       Node* di = d_pk->at(i);
       
  2767       if (alignment(ui) != alignment(di) * 2) {
       
  2768         return false;
       
  2769       }
  2692     }
  2770     }
  2693     return true;
  2771     return true;
  2694   }
  2772   }
  2695   if (u_pk->size() != d_pk->size())
  2773   if (u_pk->size() != d_pk->size())
  2696     return false;
  2774     return false;
  3015   SWPointer p(s, this, NULL, false);
  3093   SWPointer p(s, this, NULL, false);
  3016   if (!p.valid()) {
  3094   if (!p.valid()) {
  3017     NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
  3095     NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
  3018     return bottom_align;
  3096     return bottom_align;
  3019   }
  3097   }
  3020   int vw = vector_width_in_bytes(s);
  3098   int vw = get_vw_bytes_special(s);
  3021   if (vw < 2) {
  3099   if (vw < 2) {
  3022     NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
  3100     NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
  3023     return bottom_align; // No vectors for this type
  3101     return bottom_align; // No vectors for this type
  3024   }
  3102   }
  3025   int offset  = p.offset_in_bytes();
  3103   int offset  = p.offset_in_bytes();