8171974: Fix for R10 Register clobbering with usage of ExternalAddress
Reviewed-by: kvn, rbackman
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 03 14:56:51 2017 -0800
@@ -3499,12 +3499,12 @@
}
}
-void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
if (reachable(src)) {
movdqu(dst, as_Address(src));
} else {
- lea(rscratch1, src);
- movdqu(dst, Address(rscratch1, 0));
+ lea(scratchReg, src);
+ movdqu(dst, Address(scratchReg, 0));
}
}
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 03 14:56:51 2017 -0800
@@ -1085,7 +1085,7 @@
void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
- void movdqu(XMMRegister dst, AddressLiteral src);
+ void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp Tue Jan 03 14:56:51 2017 -0800
@@ -817,7 +817,7 @@
movl(d, Address(CTX, 4*3));
movl(e, Address(CTX, 4*4));
movl(f, Address(CTX, 4*5));
- movl(g, Address(CTX, 4*6));
+ // load g - r10 after it is used as scratch
movl(h, Address(CTX, 4*7));
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
@@ -825,6 +825,8 @@
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip]
+ movl(g, Address(CTX, 4*6));
+
movq(Address(rsp, _CTX), CTX); // store
bind(loop0);
@@ -977,7 +979,7 @@
movl(d, Address(CTX, 4*3)); // 0xa54ff53a
movl(e, Address(CTX, 4*4)); // 0x510e527f
movl(f, Address(CTX, 4*5)); // 0x9b05688c
- movl(g, Address(CTX, 4*6)); // 0x1f83d9ab
+ // load g - r10 after use as scratch
movl(h, Address(CTX, 4*7)); // 0x5be0cd19
@@ -986,6 +988,8 @@
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip]
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip]
+ movl(g, Address(CTX, 4*6)); // 0x1f83d9ab
+
movq(Address(rsp, _CTX), CTX);
jmpb(do_last_block);
@@ -1154,9 +1158,8 @@
// Move to appropriate lanes for calculating w[16] and w[17]
vperm2f128(xmm4, xmm0, xmm0, 0); //xmm4 = W[-16] + W[-7] + s0{ BABA }
- address MASK_YMM_LO = StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512();
//Move to appropriate lanes for calculating w[18] and w[19]
- vpand(xmm0, xmm0, ExternalAddress(MASK_YMM_LO + 32), AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 }
+ vpand(xmm0, xmm0, xmm10, AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 }
//Calculate w[16] and w[17] in both 128 bit lanes
//Calculate sigma1 for w[16] and w[17] on both 128 bit lanes
vperm2f128(xmm2, xmm7, xmm7, 17); //xmm2 = W[-2] {BABA}
@@ -1250,6 +1253,7 @@
const XMMRegister& XFER = xmm0; // YTMP0
const XMMRegister& BYTE_FLIP_MASK = xmm9; // ymm9
+ const XMMRegister& YMM_MASK_LO = xmm10; // ymm10
#ifdef _WIN64
const Register& INP = rcx; //1st arg
const Register& CTX = rdx; //2nd arg
@@ -1368,11 +1372,14 @@
movq(d, Address(CTX, 8 * 3));
movq(e, Address(CTX, 8 * 4));
movq(f, Address(CTX, 8 * 5));
- movq(g, Address(CTX, 8 * 6));
+ // load g - r10 after it is used as scratch
movq(h, Address(CTX, 8 * 7));
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512;
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip
+ vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));
+
+ movq(g, Address(CTX, 8 * 6));
bind(loop0);
lea(TBL, ExternalAddress(K512_W));
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jan 03 14:56:51 2017 -0800
@@ -3207,7 +3207,7 @@
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
#else
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
- const Register len_reg = r10; // pick the first volatile windows register
+ const Register len_reg = r11; // pick the volatile windows register
#endif
const Register pos = rax;
@@ -3404,7 +3404,7 @@
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
#else
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
- const Register len_reg = r10; // pick the first volatile windows register
+ const Register len_reg = r11; // pick the volatile windows register
#endif
const Register pos = rax;
@@ -3930,7 +3930,7 @@
__ push(rbx); // Save RBX
__ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
- __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
__ movptr(pos, 0);
@@ -3953,7 +3953,7 @@
__ movl(Address(used_addr, 0), used);
// key length could be only {11, 13, 15} * 4 = {44, 52, 60}
- __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch
__ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ cmpl(rbx, 52);
__ jcc(Assembler::equal, L_multiBlock_loopTop[1]);