643 } else { |
643 } else { |
644 // Check if packs of the same memory type but |
644 // Check if packs of the same memory type but |
645 // with a different alignment were created before. |
645 // with a different alignment were created before. |
646 for (uint i = 0; i < align_to_refs.size(); i++) { |
646 for (uint i = 0; i < align_to_refs.size(); i++) { |
647 MemNode* mr = align_to_refs.at(i)->as_Mem(); |
647 MemNode* mr = align_to_refs.at(i)->as_Mem(); |
|
648 if (mr == mem_ref) { |
|
649 // Skip when we are looking at same memory operation. |
|
650 continue; |
|
651 } |
648 if (same_velt_type(mr, mem_ref) && |
652 if (same_velt_type(mr, mem_ref) && |
649 memory_alignment(mr, iv_adjustment) != 0) |
653 memory_alignment(mr, iv_adjustment) != 0) |
650 create_pack = false; |
654 create_pack = false; |
651 } |
655 } |
652 } |
656 } |
844 return memops.at(max_idx)->as_Mem(); |
848 return memops.at(max_idx)->as_Mem(); |
845 } |
849 } |
846 return NULL; |
850 return NULL; |
847 } |
851 } |
848 |
852 |
|
853 //------------------span_works_for_memory_size----------------------------- |
|
854 static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) { |
|
855 bool span_matches_memory = false; |
|
856 if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT)) |
|
857 && ABS(span) == type2aelembytes(T_INT)) { |
|
858 // There is a mismatch on span size compared to memory. |
|
859 for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) { |
|
860 Node* use = mem->fast_out(j); |
|
861 if (!VectorNode::is_type_transition_to_int(use)) { |
|
862 return false; |
|
863 } |
|
864 } |
|
865 // If all uses transition to integer, it means that we can successfully align even on mismatch. |
|
866 return true; |
|
867 } |
|
868 else { |
|
869 span_matches_memory = ABS(span) == mem_size; |
|
870 } |
|
871 return span_matches_memory && (ABS(offset) % mem_size) == 0; |
|
872 } |
|
873 |
849 //------------------------------ref_is_alignable--------------------------- |
874 //------------------------------ref_is_alignable--------------------------- |
850 // Can the preloop align the reference to position zero in the vector? |
875 // Can the preloop align the reference to position zero in the vector? |
851 bool SuperWord::ref_is_alignable(SWPointer& p) { |
876 bool SuperWord::ref_is_alignable(SWPointer& p) { |
852 if (!p.has_iv()) { |
877 if (!p.has_iv()) { |
853 return true; // no induction variable |
878 return true; // no induction variable |
860 int span = preloop_stride * p.scale_in_bytes(); |
885 int span = preloop_stride * p.scale_in_bytes(); |
861 int mem_size = p.memory_size(); |
886 int mem_size = p.memory_size(); |
862 int offset = p.offset_in_bytes(); |
887 int offset = p.offset_in_bytes(); |
863 // Stride one accesses are alignable if offset is aligned to memory operation size. |
888 // Stride one accesses are alignable if offset is aligned to memory operation size. |
864 // Offset can be unaligned when UseUnalignedAccesses is used. |
889 // Offset can be unaligned when UseUnalignedAccesses is used. |
865 if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) { |
890 if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) { |
866 return true; |
891 return true; |
867 } |
892 } |
868 // If the initial offset from start of the object is computable, |
893 // If the initial offset from start of the object is computable, |
869 // check if the pre-loop can align the final offset accordingly. |
894 // check if the pre-loop can align the final offset accordingly. |
870 // |
895 // |
913 return (init_offset % vw) == 0; |
938 return (init_offset % vw) == 0; |
914 } |
939 } |
915 } |
940 } |
916 return false; |
941 return false; |
917 } |
942 } |
|
943 //---------------------------get_vw_bytes_special------------------------ |
|
944 int SuperWord::get_vw_bytes_special(MemNode* s) { |
|
945 // Get the vector width in bytes. |
|
946 int vw = vector_width_in_bytes(s); |
|
947 |
|
948 // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined. |
|
949 BasicType btype = velt_basic_type(s); |
|
950 if (type2aelembytes(btype) == 2) { |
|
951 bool should_combine_adjacent = true; |
|
952 for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { |
|
953 Node* user = s->fast_out(i); |
|
954 if (!VectorNode::is_muladds2i(user)) { |
|
955 should_combine_adjacent = false; |
|
956 } |
|
957 } |
|
958 if (should_combine_adjacent) { |
|
959 vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2); |
|
960 } |
|
961 } |
|
962 |
|
963 return vw; |
|
964 } |
918 |
965 |
919 //---------------------------get_iv_adjustment--------------------------- |
966 //---------------------------get_iv_adjustment--------------------------- |
920 // Calculate loop's iv adjustment for this memory ops. |
967 // Calculate loop's iv adjustment for this memory ops. |
921 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { |
968 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { |
922 SWPointer align_to_ref_p(mem_ref, this, NULL, false); |
969 SWPointer align_to_ref_p(mem_ref, this, NULL, false); |
923 int offset = align_to_ref_p.offset_in_bytes(); |
970 int offset = align_to_ref_p.offset_in_bytes(); |
924 int scale = align_to_ref_p.scale_in_bytes(); |
971 int scale = align_to_ref_p.scale_in_bytes(); |
925 int elt_size = align_to_ref_p.memory_size(); |
972 int elt_size = align_to_ref_p.memory_size(); |
926 int vw = vector_width_in_bytes(mem_ref); |
973 int vw = get_vw_bytes_special(mem_ref); |
927 assert(vw > 1, "sanity"); |
974 assert(vw > 1, "sanity"); |
928 int iv_adjustment; |
975 int iv_adjustment; |
929 if (scale != 0) { |
976 if (scale != 0) { |
930 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
977 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
931 // At least one iteration is executed in pre-loop by default. As result |
978 // At least one iteration is executed in pre-loop by default. As result |
2301 Node* mem = first->in(MemNode::Memory); |
2348 Node* mem = first->in(MemNode::Memory); |
2302 Node* adr = low_adr->in(MemNode::Address); |
2349 Node* adr = low_adr->in(MemNode::Address); |
2303 const TypePtr* atyp = n->adr_type(); |
2350 const TypePtr* atyp = n->adr_type(); |
2304 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); |
2351 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen); |
2305 vlen_in_bytes = vn->as_StoreVector()->memory_size(); |
2352 vlen_in_bytes = vn->as_StoreVector()->memory_size(); |
|
2353 } else if (VectorNode::is_muladds2i(n)) { |
|
2354 assert(n->req() == 5u, "MulAddS2I should have 4 operands."); |
|
2355 Node* in1 = vector_opd(p, 1); |
|
2356 Node* in2 = vector_opd(p, 2); |
|
2357 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n)); |
|
2358 vlen_in_bytes = vn->as_Vector()->length_in_bytes(); |
2306 } else if (n->req() == 3 && !is_cmov_pack(p)) { |
2359 } else if (n->req() == 3 && !is_cmov_pack(p)) { |
2307 // Promote operands to vector |
2360 // Promote operands to vector |
2308 Node* in1 = NULL; |
2361 Node* in1 = NULL; |
2309 bool node_isa_reduction = n->is_reduction(); |
2362 bool node_isa_reduction = n->is_reduction(); |
2310 if (node_isa_reduction) { |
2363 if (node_isa_reduction) { |
2613 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");}) |
2666 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");}) |
2614 return NULL; |
2667 return NULL; |
2615 } |
2668 } |
2616 assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); |
2669 assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); |
2617 pk->add_opd(in); |
2670 pk->add_opd(in); |
|
2671 if (VectorNode::is_muladds2i(pi)) { |
|
2672 Node* in2 = pi->in(opd_idx + 2); |
|
2673 assert(my_pack(in2) == NULL, "Should already have been unpacked"); |
|
2674 if (my_pack(in2) != NULL) { |
|
2675 NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); }) |
|
2676 return NULL; |
|
2677 } |
|
2678 assert(opd_bt == in2->bottom_type()->basic_type(), "all same type"); |
|
2679 pk->add_opd(in2); |
|
2680 } |
2618 } |
2681 } |
2619 _igvn.register_new_node_with_optimizer(pk); |
2682 _igvn.register_new_node_with_optimizer(pk); |
2620 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
2683 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
2621 #ifdef ASSERT |
2684 #ifdef ASSERT |
2622 if (TraceNewVectors) { |
2685 if (TraceNewVectors) { |
2687 if (d_pk == NULL) { |
2750 if (d_pk == NULL) { |
2688 // check for scalar promotion |
2751 // check for scalar promotion |
2689 Node* n = u_pk->at(0)->in(u_idx); |
2752 Node* n = u_pk->at(0)->in(u_idx); |
2690 for (uint i = 1; i < u_pk->size(); i++) { |
2753 for (uint i = 1; i < u_pk->size(); i++) { |
2691 if (u_pk->at(i)->in(u_idx) != n) return false; |
2754 if (u_pk->at(i)->in(u_idx) != n) return false; |
|
2755 } |
|
2756 return true; |
|
2757 } |
|
2758 if (VectorNode::is_muladds2i(use)) { |
|
2759 // MulAddS2I takes shorts and produces ints - hence the special checks |
|
2760 // on alignment and size. |
|
2761 if (u_pk->size() * 2 != d_pk->size()) { |
|
2762 return false; |
|
2763 } |
|
2764 for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) { |
|
2765 Node* ui = u_pk->at(i); |
|
2766 Node* di = d_pk->at(i); |
|
2767 if (alignment(ui) != alignment(di) * 2) { |
|
2768 return false; |
|
2769 } |
2692 } |
2770 } |
2693 return true; |
2771 return true; |
2694 } |
2772 } |
2695 if (u_pk->size() != d_pk->size()) |
2773 if (u_pk->size() != d_pk->size()) |
2696 return false; |
2774 return false; |
3015 SWPointer p(s, this, NULL, false); |
3093 SWPointer p(s, this, NULL, false); |
3016 if (!p.valid()) { |
3094 if (!p.valid()) { |
3017 NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) |
3095 NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) |
3018 return bottom_align; |
3096 return bottom_align; |
3019 } |
3097 } |
3020 int vw = vector_width_in_bytes(s); |
3098 int vw = get_vw_bytes_special(s); |
3021 if (vw < 2) { |
3099 if (vw < 2) { |
3022 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) |
3100 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) |
3023 return bottom_align; // No vectors for this type |
3101 return bottom_align; // No vectors for this type |
3024 } |
3102 } |
3025 int offset = p.offset_in_bytes(); |
3103 int offset = p.offset_in_bytes(); |