8190934: Regressions on Haswell Xeon due to JDK-8178811
Reviewed-by: neliasso, kvn
--- a/src/hotspot/cpu/x86/x86_64.ad Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/cpu/x86/x86_64.ad Fri Dec 15 10:44:06 2017 -0800
@@ -547,8 +547,12 @@
#define __ _masm.
+static bool generate_vzeroupper(Compile* C) {
+ return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
+}
+
static int clear_avx_size() {
- return (VM_Version::supports_vzeroupper()) ? 3: 0; // vzeroupper
+ return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
}
// !!!!! Special hack to get all types of calls to specify the byte offset
@@ -931,7 +935,7 @@
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
Compile* C = ra_->C;
- if (VM_Version::supports_vzeroupper()) {
+ if (generate_vzeroupper(C)) {
st->print("vzeroupper");
st->cr(); st->print("\t");
}
@@ -971,9 +975,11 @@
Compile* C = ra_->C;
MacroAssembler _masm(&cbuf);
- // Clear upper bits of YMM registers when current compiled code uses
- // wide vectors to avoid AVX <-> SSE transition penalty during call.
- __ vzeroupper();
+ if (generate_vzeroupper(C)) {
+ // Clear upper bits of YMM registers when current compiled code uses
+ // wide vectors to avoid AVX <-> SSE transition penalty during call.
+ __ vzeroupper();
+ }
int framesize = C->frame_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
@@ -2112,11 +2118,13 @@
enc_class clear_avx %{
debug_only(int off0 = cbuf.insts_size());
- // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
- // Clear upper bits of YMM registers when current compiled code uses
- // wide vectors to avoid AVX <-> SSE transition penalty during call.
- MacroAssembler _masm(&cbuf);
- __ vzeroupper();
+ if (generate_vzeroupper(Compile::current())) {
+ // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
+ // Clear upper bits of YMM registers when current compiled code uses
+ // wide vectors to avoid AVX <-> SSE transition penalty during call.
+ MacroAssembler _masm(&cbuf);
+ __ vzeroupper();
+ }
debug_only(int off1 = cbuf.insts_size());
assert(off1 - off0 == clear_avx_size(), "correct size prediction");
%}
--- a/src/hotspot/share/opto/compile.cpp Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/share/opto/compile.cpp Fri Dec 15 10:44:06 2017 -0800
@@ -1094,6 +1094,7 @@
_major_progress = true; // start out assuming good things will happen
set_has_unsafe_access(false);
set_max_vector_size(0);
+ set_clear_upper_avx(false); //false as default for clear upper bits of ymm registers
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
set_decompile_count(0);
--- a/src/hotspot/share/opto/compile.hpp Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/share/opto/compile.hpp Fri Dec 15 10:44:06 2017 -0800
@@ -379,6 +379,7 @@
bool _has_boxed_value; // True if a boxed object is allocated
bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
uint _max_vector_size; // Maximum size of generated vectors
+ bool _clear_upper_avx; // Clear upper bits of ymm registers using vzeroupper
uint _trap_hist[trapHistLength]; // Cumulative traps
bool _trap_can_recompile; // Have we emitted a recompiling trap?
uint _decompile_count; // Cumulative decompilation counts.
@@ -656,8 +657,10 @@
void set_has_boxed_value(bool z) { _has_boxed_value = z; }
bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
- uint max_vector_size() const { return _max_vector_size; }
+ uint max_vector_size() const { return _max_vector_size; }
void set_max_vector_size(uint s) { _max_vector_size = s; }
+ bool clear_upper_avx() const { return _clear_upper_avx; }
+ void set_clear_upper_avx(bool s) { _clear_upper_avx = s; }
void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
bool trap_can_recompile() const { return _trap_can_recompile; }
--- a/src/hotspot/share/opto/library_call.cpp Fri Dec 15 10:26:45 2017 -0800
+++ b/src/hotspot/share/opto/library_call.cpp Fri Dec 15 10:44:06 2017 -0800
@@ -328,6 +328,13 @@
bool inline_profileBoolean();
bool inline_isCompileConstant();
+ void clear_upper_avx() {
+#ifdef X86
+ if (UseAVX >= 2) {
+ C->set_clear_upper_avx(true);
+ }
+#endif
+ }
};
//---------------------------make_vm_intrinsic----------------------------
@@ -1082,6 +1089,7 @@
// All these intrinsics have checks.
C->set_has_split_ifs(true); // Has chance for split-if optimization
+ clear_upper_avx();
return _gvn.transform(result);
}
@@ -1156,6 +1164,8 @@
const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES;
set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae)));
+ clear_upper_avx();
+
return true;
}
@@ -1227,6 +1237,7 @@
result = _gvn.transform(result);
set_result(result);
replace_in_map(index, result);
+ clear_upper_avx();
return true;
}
@@ -1325,6 +1336,7 @@
set_control(_gvn.transform(region));
record_for_igvn(region);
set_result(_gvn.transform(phi));
+ clear_upper_avx();
return true;
}
@@ -1488,6 +1500,8 @@
if (compress) {
set_result(_gvn.transform(count));
}
+ clear_upper_avx();
+
return true;
}
@@ -1585,6 +1599,8 @@
if (!stopped()) {
set_result(newcopy);
}
+ clear_upper_avx();
+
return true;
}
@@ -5286,6 +5302,8 @@
assert(validated, "shouldn't transform if all arguments not validated");
set_all_memory(n);
}
+ clear_upper_avx();
+
return true;
}
@@ -5406,6 +5424,8 @@
Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
set_memory(res_mem, mtype);
set_result(enc);
+ clear_upper_avx();
+
return true;
}