8171974: Fix for R10 Register clobbering with usage of ExternalAddress
authorvdeshpande
Tue, 03 Jan 2017 14:56:51 -0800
changeset 43423 bcaab17f72a5
parent 43422 d4693bf78777
child 43424 5c5d1360b2d3
8171974: Fix for R10 Register clobbering with usage of ExternalAddress Reviewed-by: kvn, rbackman
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Jan 03 14:56:51 2017 -0800
@@ -3499,12 +3499,12 @@
   }
 }
 
-void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
   if (reachable(src)) {
     movdqu(dst, as_Address(src));
   } else {
-    lea(rscratch1, src);
-    movdqu(dst, Address(rscratch1, 0));
+    lea(scratchReg, src);
+    movdqu(dst, Address(scratchReg, 0));
   }
 }
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Jan 03 14:56:51 2017 -0800
@@ -1085,7 +1085,7 @@
   void movdqu(Address     dst, XMMRegister src);
   void movdqu(XMMRegister dst, Address src);
   void movdqu(XMMRegister dst, XMMRegister src);
-  void movdqu(XMMRegister dst, AddressLiteral src);
+  void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
   // AVX Unaligned forms
   void vmovdqu(Address     dst, XMMRegister src);
   void vmovdqu(XMMRegister dst, Address src);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp	Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp	Tue Jan 03 14:56:51 2017 -0800
@@ -817,7 +817,7 @@
   movl(d, Address(CTX, 4*3));
   movl(e, Address(CTX, 4*4));
   movl(f, Address(CTX, 4*5));
-  movl(g, Address(CTX, 4*6));
+  // load g - r10 after it is used as scratch
   movl(h, Address(CTX, 4*7));
 
   pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
@@ -825,6 +825,8 @@
   vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));     //[_SHUF_00BA wrt rip]
   vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64));     //[_SHUF_DC00 wrt rip]
 
+  movl(g, Address(CTX, 4*6));
+
   movq(Address(rsp, _CTX), CTX);           // store
 
 bind(loop0);
@@ -977,7 +979,7 @@
   movl(d, Address(CTX, 4*3));   // 0xa54ff53a
   movl(e, Address(CTX, 4*4));   // 0x510e527f
   movl(f, Address(CTX, 4*5));   // 0x9b05688c
-  movl(g, Address(CTX, 4*6));   // 0x1f83d9ab
+  // load g - r10 after use as scratch
   movl(h, Address(CTX, 4*7));   // 0x5be0cd19
 
 
@@ -986,6 +988,8 @@
   vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));     //[_SHUF_00BA wrt rip]
   vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64));     //[_SHUF_DC00 wrt rip]
 
+  movl(g, Address(CTX, 4*6));   // 0x1f83d9ab
+
   movq(Address(rsp, _CTX), CTX);
   jmpb(do_last_block);
 
@@ -1154,9 +1158,8 @@
       // Move to appropriate lanes for calculating w[16] and w[17]
       vperm2f128(xmm4, xmm0, xmm0, 0); //xmm4 = W[-16] + W[-7] + s0{ BABA }
 
-      address MASK_YMM_LO = StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512();
       //Move to appropriate lanes for calculating w[18] and w[19]
-      vpand(xmm0, xmm0, ExternalAddress(MASK_YMM_LO + 32), AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 }
+      vpand(xmm0, xmm0, xmm10, AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 }
       //Calculate w[16] and w[17] in both 128 bit lanes
       //Calculate sigma1 for w[16] and w[17] on both 128 bit lanes
       vperm2f128(xmm2, xmm7, xmm7, 17); //xmm2 = W[-2] {BABA}
@@ -1250,6 +1253,7 @@
 
     const XMMRegister& XFER = xmm0; // YTMP0
     const XMMRegister& BYTE_FLIP_MASK = xmm9; // ymm9
+    const XMMRegister& YMM_MASK_LO = xmm10; // ymm10
 #ifdef _WIN64
     const Register& INP = rcx; //1st arg
     const Register& CTX = rdx; //2nd arg
@@ -1368,11 +1372,14 @@
     movq(d, Address(CTX, 8 * 3));
     movq(e, Address(CTX, 8 * 4));
     movq(f, Address(CTX, 8 * 5));
-    movq(g, Address(CTX, 8 * 6));
+    // load g - r10 after it is used as scratch
     movq(h, Address(CTX, 8 * 7));
 
     pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512;
     vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip
+    vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));
+
+    movq(g, Address(CTX, 8 * 6));
 
     bind(loop0);
     lea(TBL, ExternalAddress(K512_W));
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Jan 03 21:36:05 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Jan 03 14:56:51 2017 -0800
@@ -3207,7 +3207,7 @@
     const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
 #else
     const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
-    const Register len_reg     = r10;      // pick the first volatile windows register
+    const Register len_reg     = r11;      // pick the volatile windows register
 #endif
     const Register pos         = rax;
 
@@ -3404,7 +3404,7 @@
     const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
 #else
     const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
-    const Register len_reg     = r10;      // pick the first volatile windows register
+    const Register len_reg     = r11;      // pick the volatile windows register
 #endif
     const Register pos         = rax;
 
@@ -3930,7 +3930,7 @@
 
     __ push(rbx); // Save RBX
     __ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
-    __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+    __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch
     __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
     __ movptr(pos, 0);
 
@@ -3953,7 +3953,7 @@
     __ movl(Address(used_addr, 0), used);
 
     // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
-    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch
     __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
     __ cmpl(rbx, 52);
     __ jcc(Assembler::equal, L_multiBlock_loopTop[1]);