274 |
274 |
275 // If a max vector exists which is not larger than _local_loop_unroll_factor |
275 // If a max vector exists which is not larger than _local_loop_unroll_factor |
276 // stop looking, we already have the max vector to map to. |
276 // stop looking, we already have the max vector to map to. |
277 if (cur_max_vector < local_loop_unroll_factor) { |
277 if (cur_max_vector < local_loop_unroll_factor) { |
278 is_slp = false; |
278 is_slp = false; |
279 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("slp analysis fails: unroll limit greater than max vector\n")); |
279 if (TraceSuperWordLoopUnrollAnalysis) { |
|
280 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n"); |
|
281 } |
280 break; |
282 break; |
281 } |
283 } |
282 |
284 |
283 // Map the maximal common vector |
285 // Map the maximal common vector |
284 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { |
286 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) { |
556 Node_List* p = _packset.at(i); |
556 Node_List* p = _packset.at(i); |
557 MemNode* s = p->at(0)->as_Mem(); |
557 MemNode* s = p->at(0)->as_Mem(); |
558 assert(!same_velt_type(s, mem_ref), "sanity"); |
558 assert(!same_velt_type(s, mem_ref), "sanity"); |
559 memops.push(s); |
559 memops.push(s); |
560 } |
560 } |
561 MemNode* best_align_to_mem_ref = find_align_to_ref(memops); |
561 best_align_to_mem_ref = find_align_to_ref(memops); |
562 if (best_align_to_mem_ref == NULL) { |
562 if (best_align_to_mem_ref == NULL) { |
563 NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");) |
563 if (TraceSuperWord) { |
|
564 tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL"); |
|
565 } |
564 break; |
566 break; |
565 } |
567 } |
566 best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); |
568 best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); |
567 NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) |
569 NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) |
568 // Restore list. |
570 // Restore list. |
580 } |
582 } |
581 |
583 |
582 } // while (memops.size() != 0 |
584 } // while (memops.size() != 0 |
583 set_align_to_ref(best_align_to_mem_ref); |
585 set_align_to_ref(best_align_to_mem_ref); |
584 |
586 |
585 #ifndef PRODUCT |
|
586 if (TraceSuperWord) { |
587 if (TraceSuperWord) { |
587 tty->print_cr("\nAfter find_adjacent_refs"); |
588 tty->print_cr("\nAfter find_adjacent_refs"); |
588 print_packset(); |
589 print_packset(); |
589 } |
590 } |
590 #endif |
|
591 } |
591 } |
592 |
592 |
593 #ifndef PRODUCT |
593 #ifndef PRODUCT |
594 void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) { |
594 void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) { |
595 if (is_trace_adjacent()) { |
595 if (is_trace_adjacent()) { |
872 } |
872 } |
873 if (sink_dependent) { |
873 if (sink_dependent) { |
874 _dg.make_edge(s1, slice_sink); |
874 _dg.make_edge(s1, slice_sink); |
875 } |
875 } |
876 } |
876 } |
877 #ifndef PRODUCT |
877 |
878 if (TraceSuperWord) { |
878 if (TraceSuperWord) { |
879 tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
879 tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
880 for (int q = 0; q < _nlist.length(); q++) { |
880 for (int q = 0; q < _nlist.length(); q++) { |
881 _dg.print(_nlist.at(q)); |
881 _dg.print(_nlist.at(q)); |
882 } |
882 } |
883 tty->cr(); |
883 tty->cr(); |
884 } |
884 } |
885 #endif |
885 |
886 _nlist.clear(); |
886 _nlist.clear(); |
887 } |
887 } |
888 |
888 |
889 #ifndef PRODUCT |
|
890 if (TraceSuperWord) { |
889 if (TraceSuperWord) { |
891 tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
890 tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
892 for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
891 for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
893 _disjoint_ptrs.at(r).print(); |
892 _disjoint_ptrs.at(r).print(); |
894 tty->cr(); |
893 tty->cr(); |
895 } |
894 } |
896 tty->cr(); |
895 tty->cr(); |
897 } |
896 } |
898 #endif |
897 |
899 } |
898 } |
900 |
899 |
901 //---------------------------mem_slice_preds--------------------------- |
900 //---------------------------mem_slice_preds--------------------------- |
902 // Return a memory slice (node list) in predecessor order starting at "start" |
901 // Return a memory slice (node list) in predecessor order starting at "start" |
903 void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
902 void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
910 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
909 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
911 Node* out = n->fast_out(i); |
910 Node* out = n->fast_out(i); |
912 if (out->is_Load()) { |
911 if (out->is_Load()) { |
913 if (in_bb(out)) { |
912 if (in_bb(out)) { |
914 preds.push(out); |
913 preds.push(out); |
915 NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);) |
914 if (TraceSuperWord && Verbose) { |
|
915 tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx); |
|
916 } |
916 } |
917 } |
917 } else { |
918 } else { |
918 // FIXME |
919 // FIXME |
919 if (out->is_MergeMem() && !in_bb(out)) { |
920 if (out->is_MergeMem() && !in_bb(out)) { |
920 // Either unrolling is causing a memory edge not to disappear, |
921 // Either unrolling is causing a memory edge not to disappear, |
929 } |
930 } |
930 }//else |
931 }//else |
931 }//for |
932 }//for |
932 if (n == stop) break; |
933 if (n == stop) break; |
933 preds.push(n); |
934 preds.push(n); |
934 NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);) |
935 if (TraceSuperWord && Verbose) { |
|
936 tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx); |
|
937 } |
935 prev = n; |
938 prev = n; |
936 assert(n->is_Mem(), "unexpected node %s", n->Name()); |
939 assert(n->is_Mem(), "unexpected node %s", n->Name()); |
937 n = n->in(MemNode::Memory); |
940 n = n->in(MemNode::Memory); |
938 } |
941 } |
939 } |
942 } |
1121 Node_List* p = _packset.at(i); |
1124 Node_List* p = _packset.at(i); |
1122 order_def_uses(p); |
1125 order_def_uses(p); |
1123 } |
1126 } |
1124 } |
1127 } |
1125 |
1128 |
1126 #ifndef PRODUCT |
|
1127 if (TraceSuperWord) { |
1129 if (TraceSuperWord) { |
1128 tty->print_cr("\nAfter extend_packlist"); |
1130 tty->print_cr("\nAfter extend_packlist"); |
1129 print_packset(); |
1131 print_packset(); |
1130 } |
1132 } |
1131 #endif |
|
1132 } |
1133 } |
1133 |
1134 |
1134 //------------------------------follow_use_defs--------------------------- |
1135 //------------------------------follow_use_defs--------------------------- |
1135 // Extend the packset by visiting operand definitions of nodes in pack p |
1136 // Extend the packset by visiting operand definitions of nodes in pack p |
1136 bool SuperWord::follow_use_defs(Node_List* p) { |
1137 bool SuperWord::follow_use_defs(Node_List* p) { |
1410 if (p1 == NULL) { |
1411 if (p1 == NULL) { |
1411 _packset.remove_at(i); |
1412 _packset.remove_at(i); |
1412 } |
1413 } |
1413 } |
1414 } |
1414 |
1415 |
1415 #ifndef PRODUCT |
|
1416 if (TraceSuperWord) { |
1416 if (TraceSuperWord) { |
1417 tty->print_cr("\nAfter combine_packs"); |
1417 tty->print_cr("\nAfter combine_packs"); |
1418 print_packset(); |
1418 print_packset(); |
1419 } |
1419 } |
1420 #endif |
|
1421 } |
1420 } |
1422 |
1421 |
1423 //-----------------------------construct_my_pack_map-------------------------- |
1422 //-----------------------------construct_my_pack_map-------------------------- |
1424 // Construct the map from nodes to packs. Only valid after the |
1423 // Construct the map from nodes to packs. Only valid after the |
1425 // point where a node is only in one pack (after combine_packs). |
1424 // point where a node is only in one pack (after combine_packs). |
2242 |
2241 |
2243 if (SuperWordLoopUnrollAnalysis) { |
2242 if (SuperWordLoopUnrollAnalysis) { |
2244 if (cl->has_passed_slp()) { |
2243 if (cl->has_passed_slp()) { |
2245 uint slp_max_unroll_factor = cl->slp_max_unroll(); |
2244 uint slp_max_unroll_factor = cl->slp_max_unroll(); |
2246 if (slp_max_unroll_factor == max_vlen) { |
2245 if (slp_max_unroll_factor == max_vlen) { |
2247 NOT_PRODUCT(if (TraceSuperWordLoopUnrollAnalysis) tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte)); |
2246 if (TraceSuperWordLoopUnrollAnalysis) { |
|
2247 tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte); |
|
2248 } |
2248 // For atomic unrolled loops which are vector mapped, instigate more unrolling. |
2249 // For atomic unrolled loops which are vector mapped, instigate more unrolling. |
2249 cl->set_notpassed_slp(); |
2250 cl->set_notpassed_slp(); |
2250 C->set_major_progress(); |
2251 // if vector resources are limited, do not allow additional unrolling |
|
2252 if (FLOATPRESSURE > 8) { |
|
2253 C->set_major_progress(); |
|
2254 } |
2251 cl->mark_do_unroll_only(); |
2255 cl->mark_do_unroll_only(); |
2252 } |
2256 } |
2253 } |
2257 } |
2254 } |
2258 } |
2255 |
2259 |
2648 } |
2652 } |
2649 } |
2653 } |
2650 } |
2654 } |
2651 ct++; |
2655 ct++; |
2652 } while (again); |
2656 } while (again); |
2653 #ifndef PRODUCT |
2657 |
2654 if (TraceSuperWord && Verbose) |
2658 if (TraceSuperWord && Verbose) { |
2655 tty->print_cr("compute_max_depth iterated: %d times", ct); |
2659 tty->print_cr("compute_max_depth iterated: %d times", ct); |
2656 #endif |
2660 } |
2657 } |
2661 } |
2658 |
2662 |
2659 //-------------------------compute_vector_element_type----------------------- |
2663 //-------------------------compute_vector_element_type----------------------- |
2660 // Compute necessary vector element type for expressions |
2664 // Compute necessary vector element type for expressions |
2661 // This propagates backwards a narrower integer type when the |
2665 // This propagates backwards a narrower integer type when the |
2662 // upper bits of the value are not needed. |
2666 // upper bits of the value are not needed. |
2663 // Example: char a,b,c; a = b + c; |
2667 // Example: char a,b,c; a = b + c; |
2664 // Normally the type of the add is integer, but for packed character |
2668 // Normally the type of the add is integer, but for packed character |
2665 // operations the type of the add needs to be char. |
2669 // operations the type of the add needs to be char. |
2666 void SuperWord::compute_vector_element_type() { |
2670 void SuperWord::compute_vector_element_type() { |
2667 #ifndef PRODUCT |
2671 if (TraceSuperWord && Verbose) { |
2668 if (TraceSuperWord && Verbose) |
|
2669 tty->print_cr("\ncompute_velt_type:"); |
2672 tty->print_cr("\ncompute_velt_type:"); |
2670 #endif |
2673 } |
2671 |
2674 |
2672 // Initial type |
2675 // Initial type |
2673 for (int i = 0; i < _block.length(); i++) { |
2676 for (int i = 0; i < _block.length(); i++) { |
2674 Node* n = _block.at(i); |
2677 Node* n = _block.at(i); |
2675 set_velt_type(n, container_type(n)); |
2678 set_velt_type(n, container_type(n)); |
2756 } |
2759 } |
2757 int offset = p.offset_in_bytes(); |
2760 int offset = p.offset_in_bytes(); |
2758 offset += iv_adjust*p.memory_size(); |
2761 offset += iv_adjust*p.memory_size(); |
2759 int off_rem = offset % vw; |
2762 int off_rem = offset % vw; |
2760 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; |
2763 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; |
2761 NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);) |
2764 if (TraceSuperWord && Verbose) { |
|
2765 tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod); |
|
2766 } |
2762 return off_mod; |
2767 return off_mod; |
2763 } |
2768 } |
2764 |
2769 |
2765 //---------------------------container_type--------------------------- |
2770 //---------------------------container_type--------------------------- |
2766 // Smallest type containing range of values |
2771 // Smallest type containing range of values |
4044 } |
4049 } |
4045 }//for (DUIterator_Fast imax, |
4050 }//for (DUIterator_Fast imax, |
4046 }//for (int i... |
4051 }//for (int i... |
4047 |
4052 |
4048 if (_ii_first == -1 || _ii_last == -1) { |
4053 if (_ii_first == -1 || _ii_last == -1) { |
4049 #ifndef PRODUCT |
|
4050 if (TraceSuperWord && Verbose) { |
4054 if (TraceSuperWord && Verbose) { |
4051 tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong"); |
4055 tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong"); |
4052 } |
4056 } |
4053 #endif |
|
4054 return -1; // something vent wrong |
4057 return -1; // something vent wrong |
4055 } |
4058 } |
4056 // collect nodes in the first and last generations |
4059 // collect nodes in the first and last generations |
4057 assert(_iteration_first.length() == 0, "_iteration_first must be empty"); |
4060 assert(_iteration_first.length() == 0, "_iteration_first must be empty"); |
4058 assert(_iteration_last.length() == 0, "_iteration_last must be empty"); |
4061 assert(_iteration_last.length() == 0, "_iteration_last must be empty"); |
4222 #endif |
4222 #endif |
4223 |
4223 |
4224 for (int i = 0; i < _mem_slice_head.length(); i++) { |
4224 for (int i = 0; i < _mem_slice_head.length(); i++) { |
4225 Node* n = _mem_slice_head.at(i); |
4225 Node* n = _mem_slice_head.at(i); |
4226 if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) { |
4226 if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) { |
4227 #ifndef PRODUCT |
|
4228 if (TraceSuperWord && Verbose) { |
4227 if (TraceSuperWord && Verbose) { |
4229 tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx); |
4228 tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx); |
4230 } |
4229 } |
4231 #endif |
|
4232 continue; |
4230 continue; |
4233 } |
4231 } |
4234 |
4232 |
4235 #ifndef PRODUCT |
4233 #ifndef PRODUCT |
4236 if (_vector_loop_debug) { |
4234 if (_vector_loop_debug) { |