8190934: Regressions on Haswell Xeon due to JDK-8178811
authorvdeshpande
Fri, 15 Dec 2017 10:44:06 -0800
changeset 48371 291020144f22
parent 48370 cfde2a53d393
child 48372 a099e4d4c35b
8190934: Regressions on Haswell Xeon due to JDK-8178811 Reviewed-by: neliasso, kvn
src/hotspot/cpu/x86/x86_64.ad
src/hotspot/share/opto/compile.cpp
src/hotspot/share/opto/compile.hpp
src/hotspot/share/opto/library_call.cpp
--- a/src/hotspot/cpu/x86/x86_64.ad	Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/cpu/x86/x86_64.ad	Fri Dec 15 10:44:06 2017 -0800
@@ -547,8 +547,12 @@
 
 #define __ _masm.
 
+static bool generate_vzeroupper(Compile* C) {
+  return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
+}
+
 static int clear_avx_size() {
-  return (VM_Version::supports_vzeroupper()) ? 3: 0;  // vzeroupper
+  return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 }
 
 // !!!!! Special hack to get all types of calls to specify the byte offset
@@ -931,7 +935,7 @@
 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 {
   Compile* C = ra_->C;
-  if (VM_Version::supports_vzeroupper()) {
+  if (generate_vzeroupper(C)) {
     st->print("vzeroupper");
     st->cr(); st->print("\t");
   }
@@ -971,9 +975,11 @@
   Compile* C = ra_->C;
   MacroAssembler _masm(&cbuf);
 
-  // Clear upper bits of YMM registers when current compiled code uses
-  // wide vectors to avoid AVX <-> SSE transition penalty during call.
-  __ vzeroupper();
+  if (generate_vzeroupper(C)) {
+    // Clear upper bits of YMM registers when current compiled code uses
+    // wide vectors to avoid AVX <-> SSE transition penalty during call.
+    __ vzeroupper();
+  }
 
   int framesize = C->frame_size_in_bytes();
   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
@@ -2112,11 +2118,13 @@
 
   enc_class clear_avx %{
     debug_only(int off0 = cbuf.insts_size());
-    // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
-    // Clear upper bits of YMM registers when current compiled code uses
-    // wide vectors to avoid AVX <-> SSE transition penalty during call.
-    MacroAssembler _masm(&cbuf);
-    __ vzeroupper();
+    if (generate_vzeroupper(Compile::current())) {
+      // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
+      // Clear upper bits of YMM registers when current compiled code uses
+      // wide vectors to avoid AVX <-> SSE transition penalty during call.
+      MacroAssembler _masm(&cbuf);
+      __ vzeroupper();
+    }
     debug_only(int off1 = cbuf.insts_size());
     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
   %}
--- a/src/hotspot/share/opto/compile.cpp	Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/share/opto/compile.cpp	Fri Dec 15 10:44:06 2017 -0800
@@ -1094,6 +1094,7 @@
   _major_progress = true; // start out assuming good things will happen
   set_has_unsafe_access(false);
   set_max_vector_size(0);
+  set_clear_upper_avx(false);  //false as default for clear upper bits of ymm registers
   Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
   set_decompile_count(0);
 
--- a/src/hotspot/share/opto/compile.hpp	Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/share/opto/compile.hpp	Fri Dec 15 10:44:06 2017 -0800
@@ -379,6 +379,7 @@
   bool                  _has_boxed_value;       // True if a boxed object is allocated
   bool                  _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
   uint                  _max_vector_size;       // Maximum size of generated vectors
+  bool                  _clear_upper_avx;       // Clear upper bits of ymm registers using vzeroupper
   uint                  _trap_hist[trapHistLength];  // Cumulative traps
   bool                  _trap_can_recompile;    // Have we emitted a recompiling trap?
   uint                  _decompile_count;       // Cumulative decompilation counts.
@@ -656,8 +657,10 @@
   void          set_has_boxed_value(bool z)     { _has_boxed_value = z; }
   bool              has_reserved_stack_access() const { return _has_reserved_stack_access; }
   void          set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
-  uint               max_vector_size() const    { return _max_vector_size; }
+  uint              max_vector_size() const     { return _max_vector_size; }
   void          set_max_vector_size(uint s)     { _max_vector_size = s; }
+  bool              clear_upper_avx() const     { return _clear_upper_avx; }
+  void          set_clear_upper_avx(bool s)     { _clear_upper_avx = s; }
   void          set_trap_count(uint r, uint c)  { assert(r < trapHistLength, "oob");        _trap_hist[r] = c; }
   uint              trap_count(uint r) const    { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
   bool              trap_can_recompile() const  { return _trap_can_recompile; }
--- a/src/hotspot/share/opto/library_call.cpp	Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/share/opto/library_call.cpp	Fri Dec 15 10:44:06 2017 -0800
@@ -328,6 +328,13 @@
 
   bool inline_profileBoolean();
   bool inline_isCompileConstant();
+  void clear_upper_avx() {
+#ifdef X86
+    if (UseAVX >= 2) {
+      C->set_clear_upper_avx(true);
+    }
+#endif
+  }
 };
 
 //---------------------------make_vm_intrinsic----------------------------
@@ -1082,6 +1089,7 @@
 
   // All these intrinsics have checks.
   C->set_has_split_ifs(true); // Has chance for split-if optimization
+  clear_upper_avx();
 
   return _gvn.transform(result);
 }
@@ -1156,6 +1164,8 @@
 
   const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES;
   set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae)));
+  clear_upper_avx();
+
   return true;
 }
 
@@ -1227,6 +1237,7 @@
   result = _gvn.transform(result);
   set_result(result);
   replace_in_map(index, result);
+  clear_upper_avx();
   return true;
 }
 
@@ -1325,6 +1336,7 @@
   set_control(_gvn.transform(region));
   record_for_igvn(region);
   set_result(_gvn.transform(phi));
+  clear_upper_avx();
 
   return true;
 }
@@ -1488,6 +1500,8 @@
   if (compress) {
     set_result(_gvn.transform(count));
   }
+  clear_upper_avx();
+
   return true;
 }
 
@@ -1585,6 +1599,8 @@
   if (!stopped()) {
     set_result(newcopy);
   }
+  clear_upper_avx();
+
   return true;
 }
 
@@ -5286,6 +5302,8 @@
     assert(validated, "shouldn't transform if all arguments not validated");
     set_all_memory(n);
   }
+  clear_upper_avx();
+
 
   return true;
 }
@@ -5406,6 +5424,8 @@
   Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
   set_memory(res_mem, mtype);
   set_result(enc);
+  clear_upper_avx();
+
   return true;
 }