55 class LoopNode : public RegionNode { |
56 class LoopNode : public RegionNode { |
56 // Size is bigger to hold the flags. However, the flags do not change |
57 // Size is bigger to hold the flags. However, the flags do not change |
57 // the semantics so it does not appear in the hash & cmp functions. |
58 // the semantics so it does not appear in the hash & cmp functions. |
58 virtual uint size_of() const { return sizeof(*this); } |
59 virtual uint size_of() const { return sizeof(*this); } |
59 protected: |
60 protected: |
60 short _loop_flags; |
61 uint _loop_flags; |
61 // Names for flag bitfields |
62 // Names for flag bitfields |
62 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3, |
63 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3, |
63 MainHasNoPreLoop=4, |
64 MainHasNoPreLoop=4, |
64 HasExactTripCount=8, |
65 HasExactTripCount=8, |
65 InnerLoop=16, |
66 InnerLoop=16, |
71 DoUnrollOnly=1024, |
72 DoUnrollOnly=1024, |
72 VectorizedLoop=2048, |
73 VectorizedLoop=2048, |
73 HasAtomicPostLoop=4096, |
74 HasAtomicPostLoop=4096, |
74 HasRangeChecks=8192, |
75 HasRangeChecks=8192, |
75 IsMultiversioned=16384, |
76 IsMultiversioned=16384, |
76 StripMined=32768}; |
77 StripMined=32768, |
|
78 ProfileTripFailed=65536}; |
77 char _unswitch_count; |
79 char _unswitch_count; |
78 enum { _unswitch_max=3 }; |
80 enum { _unswitch_max=3 }; |
79 char _postloop_flags; |
81 char _postloop_flags; |
80 enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 }; |
82 enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 }; |
81 |
83 |
|
84 // Expected trip count from profile data |
|
85 float _profile_trip_cnt; |
|
86 |
82 public: |
87 public: |
83 // Names for edge indices |
88 // Names for edge indices |
84 enum { Self=0, EntryControl, LoopBackControl }; |
89 enum { Self=0, EntryControl, LoopBackControl }; |
85 |
90 |
86 int is_inner_loop() const { return _loop_flags & InnerLoop; } |
91 bool is_inner_loop() const { return _loop_flags & InnerLoop; } |
87 void set_inner_loop() { _loop_flags |= InnerLoop; } |
92 void set_inner_loop() { _loop_flags |= InnerLoop; } |
88 |
93 |
89 int range_checks_present() const { return _loop_flags & HasRangeChecks; } |
94 bool range_checks_present() const { return _loop_flags & HasRangeChecks; } |
90 int is_multiversioned() const { return _loop_flags & IsMultiversioned; } |
95 bool is_multiversioned() const { return _loop_flags & IsMultiversioned; } |
91 int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; } |
96 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; } |
92 int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } |
97 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } |
93 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } |
98 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } |
94 int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } |
99 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } |
95 int is_strip_mined() const { return _loop_flags & StripMined; } |
100 bool is_strip_mined() const { return _loop_flags & StripMined; } |
|
101 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; } |
96 |
102 |
97 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } |
103 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } |
98 void mark_has_reductions() { _loop_flags |= HasReductions; } |
104 void mark_has_reductions() { _loop_flags |= HasReductions; } |
99 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; } |
105 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; } |
100 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; } |
106 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; } |
103 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; } |
109 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; } |
104 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; } |
110 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; } |
105 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; } |
111 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; } |
106 void mark_strip_mined() { _loop_flags |= StripMined; } |
112 void mark_strip_mined() { _loop_flags |= StripMined; } |
107 void clear_strip_mined() { _loop_flags &= ~StripMined; } |
113 void clear_strip_mined() { _loop_flags &= ~StripMined; } |
|
114 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; } |
108 |
115 |
109 int unswitch_max() { return _unswitch_max; } |
116 int unswitch_max() { return _unswitch_max; } |
110 int unswitch_count() { return _unswitch_count; } |
117 int unswitch_count() { return _unswitch_count; } |
111 |
118 |
112 int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; } |
119 int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; } |
117 void set_unswitch_count(int val) { |
124 void set_unswitch_count(int val) { |
118 assert (val <= unswitch_max(), "too many unswitches"); |
125 assert (val <= unswitch_max(), "too many unswitches"); |
119 _unswitch_count = val; |
126 _unswitch_count = val; |
120 } |
127 } |
121 |
128 |
122 LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) { |
129 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } |
|
130 float profile_trip_cnt() { return _profile_trip_cnt; } |
|
131 |
|
132 LoopNode(Node *entry, Node *backedge) |
|
133 : RegionNode(3), _loop_flags(0), _unswitch_count(0), |
|
134 _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) { |
123 init_class_id(Class_Loop); |
135 init_class_id(Class_Loop); |
124 init_req(EntryControl, entry); |
136 init_req(EntryControl, entry); |
125 init_req(LoopBackControl, backedge); |
137 init_req(LoopBackControl, backedge); |
126 } |
138 } |
127 |
139 |
184 node_idx_t _main_idx; |
196 node_idx_t _main_idx; |
185 |
197 |
186 // Known trip count calculated by compute_exact_trip_count() |
198 // Known trip count calculated by compute_exact_trip_count() |
187 uint _trip_count; |
199 uint _trip_count; |
188 |
200 |
189 // Expected trip count from profile data |
|
190 float _profile_trip_cnt; |
|
191 |
|
192 // Log2 of original loop bodies in unrolled loop |
201 // Log2 of original loop bodies in unrolled loop |
193 int _unrolled_count_log2; |
202 int _unrolled_count_log2; |
194 |
203 |
195 // Node count prior to last unrolling - used to decide if |
204 // Node count prior to last unrolling - used to decide if |
196 // unroll,optimize,unroll,optimize,... is making progress |
205 // unroll,optimize,unroll,optimize,... is making progress |
201 int _slp_maximum_unroll_factor; |
210 int _slp_maximum_unroll_factor; |
202 |
211 |
203 public: |
212 public: |
204 CountedLoopNode( Node *entry, Node *backedge ) |
213 CountedLoopNode( Node *entry, Node *backedge ) |
205 : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), |
214 : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), |
206 _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), |
215 _unrolled_count_log2(0), _node_count_before_unroll(0), |
207 _node_count_before_unroll(0), _slp_maximum_unroll_factor(0) { |
216 _slp_maximum_unroll_factor(0) { |
208 init_class_id(Class_CountedLoop); |
217 init_class_id(Class_CountedLoop); |
209 // Initialize _trip_count to the largest possible value. |
218 // Initialize _trip_count to the largest possible value. |
210 // Will be reset (lower) if the loop's trip count is known. |
219 // Will be reset (lower) if the loop's trip count is known. |
211 } |
220 } |
212 |
221 |
243 // so the following main loop 'knows' that it is striding down cache |
252 // so the following main loop 'knows' that it is striding down cache |
244 // lines. |
253 // lines. |
245 |
254 |
246 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or |
255 // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or |
247 // Aligned, may be missing it's pre-loop. |
256 // Aligned, may be missing it's pre-loop. |
248 int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } |
257 bool is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } |
249 int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } |
258 bool is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } |
250 int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } |
259 bool is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } |
251 int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } |
260 bool is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } |
252 int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } |
261 bool is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; } |
253 int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; } |
262 bool was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; } |
254 int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } |
263 bool has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } |
255 int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; } |
264 bool do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; } |
256 int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } |
265 bool is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } |
257 int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; } |
266 bool has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; } |
258 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } |
267 void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } |
259 |
268 |
260 int main_idx() const { return _main_idx; } |
269 int main_idx() const { return _main_idx; } |
261 |
270 |
262 |
271 |
277 _loop_flags &= ~HasExactTripCount; |
286 _loop_flags &= ~HasExactTripCount; |
278 } |
287 } |
279 void set_notpassed_slp() { |
288 void set_notpassed_slp() { |
280 _loop_flags &= ~PassedSlpAnalysis; |
289 _loop_flags &= ~PassedSlpAnalysis; |
281 } |
290 } |
282 |
|
283 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } |
|
284 float profile_trip_cnt() { return _profile_trip_cnt; } |
|
285 |
291 |
286 void double_unrolled_count() { _unrolled_count_log2++; } |
292 void double_unrolled_count() { _unrolled_count_log2++; } |
287 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } |
293 int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } |
288 |
294 |
289 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } |
295 void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } |
299 virtual SafePointNode* outer_safepoint() const; |
305 virtual SafePointNode* outer_safepoint() const; |
300 |
306 |
301 // If this is a main loop in a pre/main/post loop nest, walk over |
307 // If this is a main loop in a pre/main/post loop nest, walk over |
302 // the predicates that were inserted by |
308 // the predicates that were inserted by |
303 // duplicate_predicates()/add_range_check_predicate() |
309 // duplicate_predicates()/add_range_check_predicate() |
|
310 static Node* skip_predicates_from_entry(Node* ctrl); |
304 Node* skip_predicates(); |
311 Node* skip_predicates(); |
305 |
312 |
306 #ifndef PRODUCT |
313 #ifndef PRODUCT |
307 virtual void dump_spec(outputStream *st) const; |
314 virtual void dump_spec(outputStream *st) const; |
308 #endif |
315 #endif |
586 |
593 |
587 // Compute loop trip count if possible |
594 // Compute loop trip count if possible |
588 void compute_trip_count(PhaseIdealLoop* phase); |
595 void compute_trip_count(PhaseIdealLoop* phase); |
589 |
596 |
590 // Compute loop trip count from profile data |
597 // Compute loop trip count from profile data |
|
598 float compute_profile_trip_cnt_helper(Node* n); |
591 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); |
599 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); |
592 |
600 |
593 // Reassociate invariant expressions. |
601 // Reassociate invariant expressions. |
594 void reassociate_invariants(PhaseIdealLoop *phase); |
602 void reassociate_invariants(PhaseIdealLoop *phase); |
595 // Reassociate invariant add and subtract expressions. |
603 // Reassociate invariant add and subtract expressions. |
730 } |
738 } |
731 return ctrl; |
739 return ctrl; |
732 } |
740 } |
733 |
741 |
734 Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop); |
742 Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop); |
735 void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii, |
743 void duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop, |
736 IdealLoopTree* outer_loop, LoopNode* outer_main_head, |
744 LoopNode* outer_main_head, uint dd_main_head); |
737 uint dd_main_head); |
745 void duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop, |
|
746 LoopNode* outer_main_head, uint dd_main_head); |
738 |
747 |
739 public: |
748 public: |
740 |
749 |
741 PhaseIterGVN &igvn() const { return _igvn; } |
750 PhaseIterGVN &igvn() const { return _igvn; } |
742 |
751 |
1071 bool clone_limit_check, |
1080 bool clone_limit_check, |
1072 PhaseIdealLoop* loop_phase, |
1081 PhaseIdealLoop* loop_phase, |
1073 PhaseIterGVN* igvn); |
1082 PhaseIterGVN* igvn); |
1074 Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); |
1083 Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); |
1075 |
1084 |
|
1085 static Node* skip_all_loop_predicates(Node* entry); |
1076 static Node* skip_loop_predicates(Node* entry); |
1086 static Node* skip_loop_predicates(Node* entry); |
1077 |
1087 |
1078 // Find a good location to insert a predicate |
1088 // Find a good location to insert a predicate |
1079 static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason); |
1089 static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason); |
1080 // Find a predicate |
1090 // Find a predicate |
1085 Node* init, Node* limit, jint stride, |
1095 Node* init, Node* limit, jint stride, |
1086 Node* range, bool upper, bool &overflow); |
1096 Node* range, bool upper, bool &overflow); |
1087 |
1097 |
1088 // Implementation of the loop predication to promote checks outside the loop |
1098 // Implementation of the loop predication to promote checks outside the loop |
1089 bool loop_predication_impl(IdealLoopTree *loop); |
1099 bool loop_predication_impl(IdealLoopTree *loop); |
|
1100 bool loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj, |
|
1101 CountedLoopNode *cl, ConNode* zero, Invariance& invar, |
|
1102 Deoptimization::DeoptReason reason); |
|
1103 bool loop_predication_should_follow_branches(IdealLoopTree *loop, ProjNode *predicate_proj, float& loop_trip_cnt); |
|
1104 void loop_predication_follow_branches(Node *c, IdealLoopTree *loop, float loop_trip_cnt, |
|
1105 PathFrequency& pf, Node_Stack& stack, VectorSet& seen, |
|
1106 Node_List& if_proj_list); |
1090 ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop, |
1107 ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop, |
1091 ProjNode* proj, ProjNode *predicate_proj, |
1108 ProjNode* proj, ProjNode *predicate_proj, |
1092 ProjNode* upper_bound_proj, |
1109 ProjNode* upper_bound_proj, |
1093 int scale, Node* offset, |
1110 int scale, Node* offset, |
1094 Node* init, Node* limit, jint stride, |
1111 Node* init, Node* limit, jint stride, |
1095 Node* rng, bool& overflow); |
1112 Node* rng, bool& overflow, |
|
1113 Deoptimization::DeoptReason reason); |
1096 Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl, |
1114 Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl, |
1097 Node* predicate_proj, int scale_con, Node* offset, |
1115 Node* predicate_proj, int scale_con, Node* offset, |
1098 Node* limit, jint stride_con); |
1116 Node* limit, jint stride_con); |
1099 |
1117 |
1100 // Helper function to collect predicate for eliminating the useless ones |
1118 // Helper function to collect predicate for eliminating the useless ones |