--- a/hotspot/src/cpu/x86/vm/x86_64.ad Tue Apr 02 09:30:07 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Wed Apr 03 11:12:57 2013 -0700
@@ -399,6 +399,9 @@
static int preserve_SP_size() {
return 3; // rex.w, op, rm(reg/reg)
}
+static int clear_avx_size() {
+ return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper
+}
// !!!!! Special hack to get all types of calls to specify the byte offset
// from the start of the call to the point where the return address
@@ -406,6 +409,7 @@
int MachCallStaticJavaNode::ret_addr_offset()
{
int offset = 5; // 5 bytes from start of call to where return address points
+ offset += clear_avx_size();
if (_method_handle_invoke)
offset += preserve_SP_size();
return offset;
@@ -413,11 +417,16 @@
int MachCallDynamicJavaNode::ret_addr_offset()
{
- return 15; // 15 bytes from start of call to where return address points
+ int offset = 15; // 15 bytes from start of call to where return address points
+ offset += clear_avx_size();
+ return offset;
}
-// In os_cpu .ad file
-// int MachCallRuntimeNode::ret_addr_offset()
+int MachCallRuntimeNode::ret_addr_offset() {
+ int offset = 13; // movq r10,#addr; callq (r10)
+ offset += clear_avx_size();
+ return offset;
+}
// Indicate if the safepoint node needs the polling page as an input,
// it does if the polling page is more than disp32 away.
@@ -434,6 +443,7 @@
// ensure that it does not span a cache line so that it can be patched.
int CallStaticJavaDirectNode::compute_padding(int current_offset) const
{
+ current_offset += clear_avx_size(); // skip vzeroupper
current_offset += 1; // skip call opcode byte
return round_to(current_offset, alignment_required()) - current_offset;
}
@@ -443,6 +453,7 @@
int CallStaticJavaHandleNode::compute_padding(int current_offset) const
{
current_offset += preserve_SP_size(); // skip mov rbp, rsp
+ current_offset += clear_avx_size(); // skip vzeroupper
current_offset += 1; // skip call opcode byte
return round_to(current_offset, alignment_required()) - current_offset;
}
@@ -451,6 +462,7 @@
// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
{
+ current_offset += clear_avx_size(); // skip vzeroupper
current_offset += 11; // skip movq instruction + call opcode byte
return round_to(current_offset, alignment_required()) - current_offset;
}
@@ -764,6 +776,11 @@
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
Compile* C = ra_->C;
+ if (C->max_vector_size() > 16) {
+ st->print("vzeroupper");
+ st->cr(); st->print("\t");
+ }
+
int framesize = C->frame_slots() << LogBytesPerInt;
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove word for return adr already pushed
@@ -793,6 +810,13 @@
void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
Compile* C = ra_->C;
+ if (C->max_vector_size() > 16) {
+ // Clear upper bits of YMM registers when current compiled code uses
+ // wide vectors to avoid AVX <-> SSE transition penalty during call.
+ MacroAssembler _masm(&cbuf);
+ __ vzeroupper();
+ }
+
int framesize = C->frame_slots() << LogBytesPerInt;
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove word for return adr already pushed
@@ -2008,6 +2032,25 @@
__ bind(miss);
%}
+ enc_class clear_avx %{
+ debug_only(int off0 = cbuf.insts_size());
+ if (ra_->C->max_vector_size() > 16) {
+ // Clear upper bits of YMM registers when current compiled code uses
+ // wide vectors to avoid AVX <-> SSE transition penalty during call.
+ MacroAssembler _masm(&cbuf);
+ __ vzeroupper();
+ }
+ debug_only(int off1 = cbuf.insts_size());
+ assert(off1 - off0 == clear_avx_size(), "correct size prediction");
+ %}
+
+ enc_class Java_To_Runtime(method meth) %{
+ // No relocation needed
+ MacroAssembler _masm(&cbuf);
+ __ mov64(r10, (int64_t) $meth$$method);
+ __ call(r10);
+ %}
+
enc_class Java_To_Interpreter(method meth)
%{
// CALL Java_To_Interpreter
@@ -11366,7 +11409,7 @@
ins_cost(300);
format %{ "call,static " %}
opcode(0xE8); /* E8 cd */
- ins_encode(Java_Static_Call(meth), call_epilog);
+ ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
ins_pipe(pipe_slow);
ins_alignment(4);
%}
@@ -11384,7 +11427,7 @@
ins_cost(300);
format %{ "call,static/MethodHandle " %}
opcode(0xE8); /* E8 cd */
- ins_encode(preserve_SP,
+ ins_encode(clear_avx, preserve_SP,
Java_Static_Call(meth),
restore_SP,
call_epilog);
@@ -11403,7 +11446,7 @@
ins_cost(300);
format %{ "movq rax, #Universe::non_oop_word()\n\t"
"call,dynamic " %}
- ins_encode(Java_Dynamic_Call(meth), call_epilog);
+ ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
ins_pipe(pipe_slow);
ins_alignment(4);
%}
@@ -11416,8 +11459,7 @@
ins_cost(300);
format %{ "call,runtime " %}
- opcode(0xE8); /* E8 cd */
- ins_encode(Java_To_Runtime(meth));
+ ins_encode(clear_avx, Java_To_Runtime(meth));
ins_pipe(pipe_slow);
%}
@@ -11429,8 +11471,7 @@
ins_cost(300);
format %{ "call_leaf,runtime " %}
- opcode(0xE8); /* E8 cd */
- ins_encode(Java_To_Runtime(meth));
+ ins_encode(clear_avx, Java_To_Runtime(meth));
ins_pipe(pipe_slow);
%}
@@ -11442,7 +11483,6 @@
ins_cost(300);
format %{ "call_leaf_nofp,runtime " %}
- opcode(0xE8); /* E8 cd */
ins_encode(Java_To_Runtime(meth));
ins_pipe(pipe_slow);
%}