# HG changeset patch # User vdeshpande # Date 1483484211 28800 # Node ID bcaab17f72a51c9fcc655e4cd33b09e1c790189d # Parent d4693bf78777bcddf2f724c5592549dc715e55ab 8171974: Fix for R10 Register clobbering with usage of ExternalAddress Reviewed-by: kvn, rbackman diff -r d4693bf78777 -r bcaab17f72a5 hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 03 21:36:05 2017 +0100 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jan 03 14:56:51 2017 -0800 @@ -3499,12 +3499,12 @@ } } -void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) { if (reachable(src)) { movdqu(dst, as_Address(src)); } else { - lea(rscratch1, src); - movdqu(dst, Address(rscratch1, 0)); + lea(scratchReg, src); + movdqu(dst, Address(scratchReg, 0)); } } diff -r d4693bf78777 -r bcaab17f72a5 hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 03 21:36:05 2017 +0100 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jan 03 14:56:51 2017 -0800 @@ -1085,7 +1085,7 @@ void movdqu(Address dst, XMMRegister src); void movdqu(XMMRegister dst, Address src); void movdqu(XMMRegister dst, XMMRegister src); - void movdqu(XMMRegister dst, AddressLiteral src); + void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); // AVX Unaligned forms void vmovdqu(Address dst, XMMRegister src); void vmovdqu(XMMRegister dst, Address src); diff -r d4693bf78777 -r bcaab17f72a5 hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp Tue Jan 03 21:36:05 2017 +0100 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp Tue Jan 03 14:56:51 2017 -0800 @@ -817,7 +817,7 @@ movl(d, Address(CTX, 4*3)); movl(e, Address(CTX, 4*4)); movl(f, Address(CTX, 4*5)); - movl(g, Address(CTX, 4*6)); + // load g - r10 after it is used as scratch movl(h, Address(CTX, 4*7)); pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask; @@ -825,6 +825,8 @@ vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip] vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip] + movl(g, Address(CTX, 4*6)); + movq(Address(rsp, _CTX), CTX); // store bind(loop0); @@ -977,7 +979,7 @@ movl(d, Address(CTX, 4*3)); // 0xa54ff53a movl(e, Address(CTX, 4*4)); // 0x510e527f movl(f, Address(CTX, 4*5)); // 0x9b05688c - movl(g, Address(CTX, 4*6)); // 0x1f83d9ab + // load g - r10 after use as scratch movl(h, Address(CTX, 4*7)); // 0x5be0cd19 @@ -986,6 +988,8 @@ vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip] vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip] + movl(g, Address(CTX, 4*6)); // 0x1f83d9ab + movq(Address(rsp, _CTX), CTX); jmpb(do_last_block); @@ -1154,9 +1158,8 @@ // Move to appropriate lanes for calculating w[16] and w[17] vperm2f128(xmm4, xmm0, xmm0, 0); //xmm4 = W[-16] + W[-7] + s0{ BABA } - address MASK_YMM_LO = StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512(); //Move to appropriate lanes for calculating w[18] and w[19] - vpand(xmm0, xmm0, ExternalAddress(MASK_YMM_LO + 32), AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 } + vpand(xmm0, xmm0, xmm10, AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 } //Calculate w[16] and w[17] in both 128 bit lanes //Calculate sigma1 for w[16] and w[17] on both 128 bit lanes vperm2f128(xmm2, xmm7, xmm7, 17); //xmm2 = W[-2] {BABA} @@ -1250,6 +1253,7 @@ const XMMRegister& XFER = xmm0; // YTMP0 const XMMRegister& BYTE_FLIP_MASK = xmm9; // ymm9 + const XMMRegister& YMM_MASK_LO = xmm10; // ymm10 #ifdef _WIN64 const Register& INP = rcx; //1st arg const Register& CTX = rdx; //2nd arg @@ -1368,11 +1372,14 @@ movq(d, Address(CTX, 8 * 3)); movq(e, Address(CTX, 8 * 4)); movq(f, Address(CTX, 8 * 5)); - movq(g, Address(CTX, 8 * 6)); + // load g - r10 after it is used as scratch movq(h, Address(CTX, 8 * 7)); pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512; vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip + vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); + + movq(g, Address(CTX, 8 * 6)); bind(loop0); lea(TBL, ExternalAddress(K512_W)); diff -r d4693bf78777 -r bcaab17f72a5 hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jan 03 21:36:05 2017 +0100 +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jan 03 14:56:51 2017 -0800 @@ -3207,7 +3207,7 @@ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) #else const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 - const Register len_reg = r10; // pick the first volatile windows register + const Register len_reg = r11; // pick the volatile windows register #endif const Register pos = rax; @@ -3404,7 +3404,7 @@ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) #else const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 - const Register len_reg = r10; // pick the first volatile windows register + const Register len_reg = r11; // pick the volatile windows register #endif const Register pos = rax; @@ -3930,7 +3930,7 @@ __ push(rbx); // Save RBX __ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr())); + __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled __ movptr(pos, 0); @@ -3953,7 +3953,7 @@ __ movl(Address(used_addr, 0), used); // key length could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ cmpl(rbx, 52); __ jcc(Assembler::equal, L_multiBlock_loopTop[1]);