src/hotspot/share/opto/loopnode.hpp
changeset 50623 5209d8a6303e
parent 50561 5756e8eecb17
child 50632 fd430e352427
equal deleted inserted replaced
50622:21b96ce2ed10 50623:5209d8a6303e
    36 class CountedLoopNode;
    36 class CountedLoopNode;
    37 class IdealLoopTree;
    37 class IdealLoopTree;
    38 class LoopNode;
    38 class LoopNode;
    39 class Node;
    39 class Node;
    40 class OuterStripMinedLoopEndNode;
    40 class OuterStripMinedLoopEndNode;
       
    41 class PathFrequency;
    41 class PhaseIdealLoop;
    42 class PhaseIdealLoop;
    42 class CountedLoopReserveKit;
    43 class CountedLoopReserveKit;
    43 class VectorSet;
    44 class VectorSet;
    44 class Invariance;
    45 class Invariance;
    45 struct small_cache;
    46 struct small_cache;
    55 class LoopNode : public RegionNode {
    56 class LoopNode : public RegionNode {
    56   // Size is bigger to hold the flags.  However, the flags do not change
    57   // Size is bigger to hold the flags.  However, the flags do not change
    57   // the semantics so it does not appear in the hash & cmp functions.
    58   // the semantics so it does not appear in the hash & cmp functions.
    58   virtual uint size_of() const { return sizeof(*this); }
    59   virtual uint size_of() const { return sizeof(*this); }
    59 protected:
    60 protected:
    60   short _loop_flags;
    61   uint _loop_flags;
    61   // Names for flag bitfields
    62   // Names for flag bitfields
    62   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
    63   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
    63          MainHasNoPreLoop=4,
    64          MainHasNoPreLoop=4,
    64          HasExactTripCount=8,
    65          HasExactTripCount=8,
    65          InnerLoop=16,
    66          InnerLoop=16,
    71          DoUnrollOnly=1024,
    72          DoUnrollOnly=1024,
    72          VectorizedLoop=2048,
    73          VectorizedLoop=2048,
    73          HasAtomicPostLoop=4096,
    74          HasAtomicPostLoop=4096,
    74          HasRangeChecks=8192,
    75          HasRangeChecks=8192,
    75          IsMultiversioned=16384,
    76          IsMultiversioned=16384,
    76          StripMined=32768};
    77          StripMined=32768,
       
    78          ProfileTripFailed=65536};
    77   char _unswitch_count;
    79   char _unswitch_count;
    78   enum { _unswitch_max=3 };
    80   enum { _unswitch_max=3 };
    79   char _postloop_flags;
    81   char _postloop_flags;
    80   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
    82   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
    81 
    83 
       
    84   // Expected trip count from profile data
       
    85   float _profile_trip_cnt;
       
    86 
    82 public:
    87 public:
    83   // Names for edge indices
    88   // Names for edge indices
    84   enum { Self=0, EntryControl, LoopBackControl };
    89   enum { Self=0, EntryControl, LoopBackControl };
    85 
    90 
    86   int is_inner_loop() const { return _loop_flags & InnerLoop; }
    91   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
    87   void set_inner_loop() { _loop_flags |= InnerLoop; }
    92   void set_inner_loop() { _loop_flags |= InnerLoop; }
    88 
    93 
    89   int range_checks_present() const { return _loop_flags & HasRangeChecks; }
    94   bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
    90   int is_multiversioned() const { return _loop_flags & IsMultiversioned; }
    95   bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
    91   int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
    96   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
    92   int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
    97   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
    93   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
    98   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
    94   int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
    99   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
    95   int is_strip_mined() const { return _loop_flags & StripMined; }
   100   bool is_strip_mined() const { return _loop_flags & StripMined; }
       
   101   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
    96 
   102 
    97   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
   103   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
    98   void mark_has_reductions() { _loop_flags |= HasReductions; }
   104   void mark_has_reductions() { _loop_flags |= HasReductions; }
    99   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
   105   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
   100   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
   106   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
   103   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
   109   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
   104   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
   110   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
   105   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
   111   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
   106   void mark_strip_mined() { _loop_flags |= StripMined; }
   112   void mark_strip_mined() { _loop_flags |= StripMined; }
   107   void clear_strip_mined() { _loop_flags &= ~StripMined; }
   113   void clear_strip_mined() { _loop_flags &= ~StripMined; }
       
   114   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
   108 
   115 
   109   int unswitch_max() { return _unswitch_max; }
   116   int unswitch_max() { return _unswitch_max; }
   110   int unswitch_count() { return _unswitch_count; }
   117   int unswitch_count() { return _unswitch_count; }
   111 
   118 
   112   int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
   119   int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
   117   void set_unswitch_count(int val) {
   124   void set_unswitch_count(int val) {
   118     assert (val <= unswitch_max(), "too many unswitches");
   125     assert (val <= unswitch_max(), "too many unswitches");
   119     _unswitch_count = val;
   126     _unswitch_count = val;
   120   }
   127   }
   121 
   128 
   122   LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
   129   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
       
   130   float profile_trip_cnt()             { return _profile_trip_cnt; }
       
   131 
       
   132   LoopNode(Node *entry, Node *backedge)
       
   133     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
       
   134       _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN)  {
   123     init_class_id(Class_Loop);
   135     init_class_id(Class_Loop);
   124     init_req(EntryControl, entry);
   136     init_req(EntryControl, entry);
   125     init_req(LoopBackControl, backedge);
   137     init_req(LoopBackControl, backedge);
   126   }
   138   }
   127 
   139 
   184   node_idx_t _main_idx;
   196   node_idx_t _main_idx;
   185 
   197 
   186   // Known trip count calculated by compute_exact_trip_count()
   198   // Known trip count calculated by compute_exact_trip_count()
   187   uint  _trip_count;
   199   uint  _trip_count;
   188 
   200 
   189   // Expected trip count from profile data
       
   190   float _profile_trip_cnt;
       
   191 
       
   192   // Log2 of original loop bodies in unrolled loop
   201   // Log2 of original loop bodies in unrolled loop
   193   int _unrolled_count_log2;
   202   int _unrolled_count_log2;
   194 
   203 
   195   // Node count prior to last unrolling - used to decide if
   204   // Node count prior to last unrolling - used to decide if
   196   // unroll,optimize,unroll,optimize,... is making progress
   205   // unroll,optimize,unroll,optimize,... is making progress
   201   int _slp_maximum_unroll_factor;
   210   int _slp_maximum_unroll_factor;
   202 
   211 
   203 public:
   212 public:
   204   CountedLoopNode( Node *entry, Node *backedge )
   213   CountedLoopNode( Node *entry, Node *backedge )
   205     : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
   214     : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
   206       _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
   215       _unrolled_count_log2(0), _node_count_before_unroll(0),
   207       _node_count_before_unroll(0), _slp_maximum_unroll_factor(0) {
   216       _slp_maximum_unroll_factor(0) {
   208     init_class_id(Class_CountedLoop);
   217     init_class_id(Class_CountedLoop);
   209     // Initialize _trip_count to the largest possible value.
   218     // Initialize _trip_count to the largest possible value.
   210     // Will be reset (lower) if the loop's trip count is known.
   219     // Will be reset (lower) if the loop's trip count is known.
   211   }
   220   }
   212 
   221 
   243   // so the following main loop 'knows' that it is striding down cache
   252   // so the following main loop 'knows' that it is striding down cache
   244   // lines.
   253   // lines.
   245 
   254 
   246   // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
   255   // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
   247   // Aligned, may be missing it's pre-loop.
   256   // Aligned, may be missing it's pre-loop.
   248   int is_normal_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
   257   bool is_normal_loop   () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
   249   int is_pre_loop      () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
   258   bool is_pre_loop      () const { return (_loop_flags&PreMainPostFlagsMask) == Pre;    }
   250   int is_main_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
   259   bool is_main_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Main;   }
   251   int is_post_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
   260   bool is_post_loop     () const { return (_loop_flags&PreMainPostFlagsMask) == Post;   }
   252   int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
   261   bool is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
   253   int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
   262   bool was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
   254   int has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
   263   bool has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
   255   int do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
   264   bool do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
   256   int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
   265   bool is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
   257   int has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
   266   bool has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
   258   void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
   267   void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
   259 
   268 
   260   int main_idx() const { return _main_idx; }
   269   int main_idx() const { return _main_idx; }
   261 
   270 
   262 
   271 
   277     _loop_flags &= ~HasExactTripCount;
   286     _loop_flags &= ~HasExactTripCount;
   278   }
   287   }
   279   void set_notpassed_slp() {
   288   void set_notpassed_slp() {
   280     _loop_flags &= ~PassedSlpAnalysis;
   289     _loop_flags &= ~PassedSlpAnalysis;
   281   }
   290   }
   282 
       
   283   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
       
   284   float profile_trip_cnt()             { return _profile_trip_cnt; }
       
   285 
   291 
   286   void double_unrolled_count() { _unrolled_count_log2++; }
   292   void double_unrolled_count() { _unrolled_count_log2++; }
   287   int  unrolled_count()        { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
   293   int  unrolled_count()        { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
   288 
   294 
   289   void set_node_count_before_unroll(int ct)  { _node_count_before_unroll = ct; }
   295   void set_node_count_before_unroll(int ct)  { _node_count_before_unroll = ct; }
   299   virtual SafePointNode* outer_safepoint() const;
   305   virtual SafePointNode* outer_safepoint() const;
   300 
   306 
   301   // If this is a main loop in a pre/main/post loop nest, walk over
   307   // If this is a main loop in a pre/main/post loop nest, walk over
   302   // the predicates that were inserted by
   308   // the predicates that were inserted by
   303   // duplicate_predicates()/add_range_check_predicate()
   309   // duplicate_predicates()/add_range_check_predicate()
       
   310   static Node* skip_predicates_from_entry(Node* ctrl);
   304   Node* skip_predicates();
   311   Node* skip_predicates();
   305 
   312 
   306 #ifndef PRODUCT
   313 #ifndef PRODUCT
   307   virtual void dump_spec(outputStream *st) const;
   314   virtual void dump_spec(outputStream *st) const;
   308 #endif
   315 #endif
   586 
   593 
   587   // Compute loop trip count if possible
   594   // Compute loop trip count if possible
   588   void compute_trip_count(PhaseIdealLoop* phase);
   595   void compute_trip_count(PhaseIdealLoop* phase);
   589 
   596 
   590   // Compute loop trip count from profile data
   597   // Compute loop trip count from profile data
       
   598   float compute_profile_trip_cnt_helper(Node* n);
   591   void compute_profile_trip_cnt( PhaseIdealLoop *phase );
   599   void compute_profile_trip_cnt( PhaseIdealLoop *phase );
   592 
   600 
   593   // Reassociate invariant expressions.
   601   // Reassociate invariant expressions.
   594   void reassociate_invariants(PhaseIdealLoop *phase);
   602   void reassociate_invariants(PhaseIdealLoop *phase);
   595   // Reassociate invariant add and subtract expressions.
   603   // Reassociate invariant add and subtract expressions.
   730     }
   738     }
   731     return ctrl;
   739     return ctrl;
   732   }
   740   }
   733 
   741 
   734   Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
   742   Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
   735   void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii,
   743   void duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
   736                             IdealLoopTree* outer_loop, LoopNode* outer_main_head,
   744                                    LoopNode* outer_main_head, uint dd_main_head);
   737                             uint dd_main_head);
   745   void duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
       
   746                             LoopNode* outer_main_head, uint dd_main_head);
   738 
   747 
   739 public:
   748 public:
   740 
   749 
   741   PhaseIterGVN &igvn() const { return _igvn; }
   750   PhaseIterGVN &igvn() const { return _igvn; }
   742 
   751 
  1071                                          bool clone_limit_check,
  1080                                          bool clone_limit_check,
  1072                                          PhaseIdealLoop* loop_phase,
  1081                                          PhaseIdealLoop* loop_phase,
  1073                                          PhaseIterGVN* igvn);
  1082                                          PhaseIterGVN* igvn);
  1074   Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
  1083   Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
  1075 
  1084 
       
  1085   static Node* skip_all_loop_predicates(Node* entry);
  1076   static Node* skip_loop_predicates(Node* entry);
  1086   static Node* skip_loop_predicates(Node* entry);
  1077 
  1087 
  1078   // Find a good location to insert a predicate
  1088   // Find a good location to insert a predicate
  1079   static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason);
  1089   static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason);
  1080   // Find a predicate
  1090   // Find a predicate
  1085                          Node* init, Node* limit, jint stride,
  1095                          Node* init, Node* limit, jint stride,
  1086                          Node* range, bool upper, bool &overflow);
  1096                          Node* range, bool upper, bool &overflow);
  1087 
  1097 
  1088   // Implementation of the loop predication to promote checks outside the loop
  1098   // Implementation of the loop predication to promote checks outside the loop
  1089   bool loop_predication_impl(IdealLoopTree *loop);
  1099   bool loop_predication_impl(IdealLoopTree *loop);
       
  1100   bool loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
       
  1101                                     CountedLoopNode *cl, ConNode* zero, Invariance& invar,
       
  1102                                     Deoptimization::DeoptReason reason);
       
  1103   bool loop_predication_should_follow_branches(IdealLoopTree *loop, ProjNode *predicate_proj, float& loop_trip_cnt);
       
  1104   void loop_predication_follow_branches(Node *c, IdealLoopTree *loop, float loop_trip_cnt,
       
  1105                                         PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
       
  1106                                         Node_List& if_proj_list);
  1090   ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
  1107   ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
  1091                                       ProjNode* proj, ProjNode *predicate_proj,
  1108                                       ProjNode* proj, ProjNode *predicate_proj,
  1092                                       ProjNode* upper_bound_proj,
  1109                                       ProjNode* upper_bound_proj,
  1093                                       int scale, Node* offset,
  1110                                       int scale, Node* offset,
  1094                                       Node* init, Node* limit, jint stride,
  1111                                       Node* init, Node* limit, jint stride,
  1095                                       Node* rng, bool& overflow);
  1112                                       Node* rng, bool& overflow,
       
  1113                                       Deoptimization::DeoptReason reason);
  1096   Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
  1114   Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
  1097                                   Node* predicate_proj, int scale_con, Node* offset,
  1115                                   Node* predicate_proj, int scale_con, Node* offset,
  1098                                   Node* limit, jint stride_con);
  1116                                   Node* limit, jint stride_con);
  1099 
  1117 
  1100   // Helper function to collect predicate for eliminating the useless ones
  1118   // Helper function to collect predicate for eliminating the useless ones