8187601: Unrolling more when SLP auto-vectorization failed
authorzyao
Wed, 20 Sep 2017 18:30:32 +0800
changeset 47591 d78db2ebce5e
parent 47590 b3fd664e5af2
child 47592 68d46cb9be45
8187601: Unrolling more when SLP auto-vectorization failed Reviewed-by: kvn
src/hotspot/share/opto/loopTransform.cpp
src/hotspot/share/opto/superword.cpp
--- a/src/hotspot/share/opto/loopTransform.cpp	Wed Sep 27 16:17:47 2017 +0200
+++ b/src/hotspot/share/opto/loopTransform.cpp	Wed Sep 20 18:30:32 2017 +0800
@@ -666,7 +666,7 @@
   _local_loop_unroll_limit = LoopUnrollLimit;
   _local_loop_unroll_factor = 4;
   int future_unroll_ct = cl->unrolled_count() * 2;
-  if (!cl->do_unroll_only()) {
+  if (!cl->is_vectorized_loop()) {
     if (future_unroll_ct > LoopMaxUnroll) return false;
   } else {
     // obey user constraints on vector mapped loops with additional unrolling applied
--- a/src/hotspot/share/opto/superword.cpp	Wed Sep 27 16:17:47 2017 +0200
+++ b/src/hotspot/share/opto/superword.cpp	Wed Sep 20 18:30:32 2017 +0800
@@ -145,6 +145,8 @@
   // Skip any loops already optimized by slp
   if (cl->is_vectorized_loop()) return;
 
+  if (cl->do_unroll_only()) return;
+
   if (cl->is_main_loop()) {
     // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
     CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
@@ -2163,7 +2165,15 @@
 //------------------------------output---------------------------
 // Convert packs into vector node operations
 void SuperWord::output() {
-  if (_packset.length() == 0) return;
+  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
+  Compile* C = _phase->C;
+  if (_packset.length() == 0) {
+    // Instigate more unrolling for optimization when vectorization fails.
+    C->set_major_progress();
+    cl->set_notpassed_slp();
+    cl->mark_do_unroll_only();
+    return;
+  }
 
 #ifndef PRODUCT
   if (TraceLoopOpts) {
@@ -2172,7 +2182,6 @@
   }
 #endif
 
-  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
   if (cl->is_main_loop()) {
     // MUST ENSURE main loop's initial value is properly aligned:
     //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
@@ -2185,7 +2194,6 @@
     }
   }
 
-  Compile* C = _phase->C;
   uint max_vlen_in_bytes = 0;
   uint max_vlen = 0;
   bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
@@ -4493,4 +4501,3 @@
 
   return true;
 }
-