8175096: Analyse subword in the loop to set maximum vector size
authorvdeshpande
Wed, 19 Jul 2017 08:29:44 -0700
changeset 46692 117b089cb1c3
parent 46691 cecf240c87a0
child 46693 1a9ef5f37bf0
8175096: Analyse subword in the loop to set maximum vector size Reviewed-by: kvn
hotspot/src/share/vm/opto/c2_globals.hpp
hotspot/src/share/vm/opto/loopTransform.cpp
hotspot/src/share/vm/opto/superword.cpp
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Wed Jul 19 09:00:13 2017 +0200
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Wed Jul 19 08:29:44 2017 -0700
@@ -192,6 +192,9 @@
           "of rounds of unroll,optimize,..")                                \
           range(0, max_jint)                                                \
                                                                             \
+  product(bool, UseSubwordForMaxVector, true,                               \
+          "Use Subword Analysis to set maximum vector size")                \
+                                                                            \
   develop(intx, UnrollLimitForProfileCheck, 1,                              \
           "Don't use profile_trip_cnt() to restrict unrolling until "       \
           "unrolling would push the number of unrolled iterations above "   \
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Jul 19 09:00:13 2017 +0200
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Jul 19 08:29:44 2017 -0700
@@ -784,6 +784,9 @@
   }
 
   int slp_max_unroll_factor = cl->slp_max_unroll();
+  if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
+    LoopMaxUnroll = slp_max_unroll_factor;
+  }
   if (cl->has_passed_slp()) {
     if (slp_max_unroll_factor >= future_unroll_ct) return true;
     // Normal case: loop too big
@@ -792,7 +795,7 @@
 
   // Check for being too big
   if (body_size > (uint)_local_loop_unroll_limit) {
-    if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
+    if ((UseSubwordForMaxVector || xors_in_loop >= 4) && body_size < (uint)LoopUnrollLimit * 4) return true;
     // Normal case: loop too big
     return false;
   }
--- a/hotspot/src/share/vm/opto/superword.cpp	Wed Jul 19 09:00:13 2017 +0200
+++ b/hotspot/src/share/vm/opto/superword.cpp	Wed Jul 19 08:29:44 2017 -0700
@@ -299,6 +299,7 @@
     // Now we try to find the maximum supported consistent vector which the machine
     // description can use
     bool small_basic_type = false;
+    bool flag_small_bt = false;
     for (uint i = 0; i < lpt()->_body.size(); i++) {
       if (ignored_loop_nodes[i] != -1) continue;
 
@@ -334,7 +335,7 @@
 
       if (is_java_primitive(bt) == false) continue;
 
-      int cur_max_vector = Matcher::max_vector_size(bt);
+         int cur_max_vector = Matcher::max_vector_size(bt);
 
       // If a max vector exists which is not larger than _local_loop_unroll_factor
       // stop looking, we already have the max vector to map to.
@@ -348,10 +349,36 @@
 
       // Map the maximal common vector
       if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
-        if (cur_max_vector < max_vector) {
+        if (cur_max_vector < max_vector && !flag_small_bt) {
           max_vector = cur_max_vector;
+        } else if (cur_max_vector > max_vector && UseSubwordForMaxVector) {
+          // Analyse subword in the loop to set maximum vector size to take advantage of full vector width for subword types.
+          // Here we analyze if narrowing is likely to happen and if it is we set vector size more aggressively.
+          // We check for possibility of narrowing by looking through chain operations using subword types.
+          if (is_subword_type(bt)) {
+            uint start, end;
+            VectorNode::vector_operands(n, &start, &end);
+
+            for (uint j = start; j < end; j++) {
+              Node* in = n->in(j);
+              // Don't propagate through a memory
+              if (!in->is_Mem() && in_bb(in) && in->bottom_type()->basic_type() == T_INT) {
+                bool same_type = true;
+                for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
+                  Node *use = in->fast_out(k);
+                  if (!in_bb(use) && use->bottom_type()->basic_type() != bt) {
+                    same_type = false;
+                    break;
+                  }
+                }
+                if (same_type) {
+                  max_vector = cur_max_vector;
+                  flag_small_bt = true;
+                }
+              }
+            }
+          }
         }
-
         // We only process post loops on predicated targets where we want to
         // mask map the loop to a single iteration
         if (post_loop_allowed) {
@@ -2368,7 +2395,7 @@
         }
       }
 
-      if (vlen_in_bytes > max_vlen_in_bytes) {
+      if (vlen_in_bytes >= max_vlen_in_bytes && vlen > max_vlen) {
         max_vlen = vlen;
         max_vlen_in_bytes = vlen_in_bytes;
       }