6810845: Performance regression in mpegaudio on x64
authorkvn
Thu, 26 Mar 2009 15:04:55 -0700
changeset 2340 cb47f8209cd8
parent 2339 2c9728eddc81
child 2341 caadacf15616
6810845: Performance regression in mpegaudio on x64 Summary: Used the outer loop frequency in frequencies checks in RA. Reviewed-by: never, twisti
hotspot/src/share/vm/opto/block.hpp
hotspot/src/share/vm/opto/c2_globals.hpp
hotspot/src/share/vm/opto/chaitin.cpp
hotspot/src/share/vm/opto/chaitin.hpp
hotspot/src/share/vm/opto/coalesce.cpp
hotspot/src/share/vm/opto/gcm.cpp
hotspot/src/share/vm/opto/machnode.cpp
--- a/hotspot/src/share/vm/opto/block.hpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/block.hpp	Thu Mar 26 15:04:55 2009 -0700
@@ -371,6 +371,7 @@
   Block *_broot;                // Basic block of root
   uint _rpo_ctr;
   CFGLoop* _root_loop;
+  float _outer_loop_freq;       // Outmost loop frequency
 
   // Per node latency estimation, valid only during GCM
   GrowableArray<uint> _node_latency;
@@ -537,6 +538,7 @@
   void compute_loop_depth(int depth);
   void compute_freq(); // compute frequency with loop assuming head freq 1.0f
   void scale_freq();   // scale frequency by loop trip count (including outer loops)
+  float outer_loop_freq() const; // frequency of outer loop
   bool in_loop_nest(Block* b);
   float trip_count() const { return 1.0f / _exit_prob; }
   virtual bool is_loop()  { return true; }
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Thu Mar 26 15:04:55 2009 -0700
@@ -391,7 +391,7 @@
   product(intx, EliminateAllocationArraySizeLimit, 64,                      \
           "Array size (number of elements) limit for scalar replacement")   \
                                                                             \
-  product(bool, UseOptoBiasInlining, true,                                 \
+  product(bool, UseOptoBiasInlining, true,                                  \
           "Generate biased locking code in C2 ideal graph")                 \
                                                                             \
   product(intx, ValueSearchLimit, 1000,                                     \
@@ -410,7 +410,7 @@
           "Miniumum %% of a successor (predecessor) for which block layout "\
           "a will allow a fork (join) in a single chain")                   \
                                                                             \
-  product(bool, BlockLayoutRotateLoops, false,                              \
+  product(bool, BlockLayoutRotateLoops, true,                               \
           "Allow back branches to be fall throughs in the block layour")    \
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/hotspot/src/share/vm/opto/chaitin.cpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/chaitin.cpp	Thu Mar 26 15:04:55 2009 -0700
@@ -149,6 +149,9 @@
 #endif
 {
   NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
+
+  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq);
+
   uint i,j;
   // Build a list of basic blocks, sorted by frequency
   _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
--- a/hotspot/src/share/vm/opto/chaitin.hpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/chaitin.hpp	Thu Mar 26 15:04:55 2009 -0700
@@ -338,6 +338,8 @@
 
   Block **_blks;                // Array of blocks sorted by frequency for coalescing
 
+  float _high_frequency_lrg;    // Frequency at which LRG will be spilled for debug info
+
 #ifndef PRODUCT
   bool _trace_spilling;
 #endif
@@ -360,6 +362,8 @@
 
   uint n2lidx( const Node *n ) const { return _names[n->_idx]; }
 
+  float high_frequency_lrg() const { return _high_frequency_lrg; }
+
 #ifndef PRODUCT
   bool trace_spilling() const { return _trace_spilling; }
 #endif
--- a/hotspot/src/share/vm/opto/coalesce.cpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/coalesce.cpp	Thu Mar 26 15:04:55 2009 -0700
@@ -473,7 +473,7 @@
         } // End of is two-adr
 
         // Insert a copy at a debug use for a lrg which has high frequency
-        if( (b->_freq < OPTO_DEBUG_SPLIT_FREQ) && n->is_MachSafePoint() ) {
+        if( b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs) ) {
           // Walk the debug inputs to the node and check for lrg freq
           JVMState* jvms = n->jvms();
           uint debug_start = jvms ? jvms->debug_start() : 999999;
@@ -487,7 +487,7 @@
             LRG &lrg = lrgs(nidx);
 
             // If this lrg has a high frequency use/def
-            if( lrg._maxfreq >= OPTO_LRG_HIGH_FREQ ) {
+            if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
               // If the live range is also live out of this block (like it
               // would be for a fast/slow idiom), the normal spill mechanism
               // does an excellent job.  If it is not live out of this block
--- a/hotspot/src/share/vm/opto/gcm.cpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/gcm.cpp	Thu Mar 26 15:04:55 2009 -0700
@@ -1374,6 +1374,9 @@
   _root_loop->_freq = 1.0;
   _root_loop->scale_freq();
 
+  // Save outmost loop frequency for LRG frequency threshold
+  _outer_loop_freq = _root_loop->outer_loop_freq();
+
   // force paths ending at uncommon traps to be infrequent
   if (!C->do_freq_based_layout()) {
     Block_List worklist;
@@ -1898,6 +1901,7 @@
 // Do a top down traversal of loop tree (visit outer loops first.)
 void CFGLoop::scale_freq() {
   float loop_freq = _freq * trip_count();
+  _freq = loop_freq;
   for (int i = 0; i < _members.length(); i++) {
     CFGElement* s = _members.at(i);
     float block_freq = s->_freq * loop_freq;
@@ -1912,6 +1916,14 @@
   }
 }
 
+// Frequency of outer loop
+float CFGLoop::outer_loop_freq() const {
+  if (_child != NULL) {
+    return _child->_freq;
+  }
+  return _freq;
+}
+
 #ifndef PRODUCT
 //------------------------------dump_tree--------------------------------------
 void CFGLoop::dump_tree() const {
--- a/hotspot/src/share/vm/opto/machnode.cpp	Thu Mar 26 14:39:39 2009 -0700
+++ b/hotspot/src/share/vm/opto/machnode.cpp	Thu Mar 26 15:04:55 2009 -0700
@@ -340,6 +340,10 @@
   if (base == NodeSentinel)  return TypePtr::BOTTOM;
 
   const Type* t = base->bottom_type();
+  if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
+    // 32-bit unscaled narrow oop can be the base of any address expression
+    t = t->make_ptr();
+  }
   if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
     // We cannot assert that the offset does not look oop-ish here.
     // Depending on the heap layout the cardmark base could land
@@ -353,6 +357,7 @@
 
   // be conservative if we do not recognize the type
   if (tp == NULL) {
+    assert(false, "this path may produce not optimal code");
     return TypePtr::BOTTOM;
   }
   assert(tp->base() != Type::AnyPtr, "not a bare pointer");