hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
changeset 30624 2e1803c8a26d
parent 30310 522ea430079f
child 30751 deee942af125
child 30768 66b53dcce510
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 07 15:34:45 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May 08 11:49:20 2015 -0700
@@ -3996,21 +3996,21 @@
   }
 }
 
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src), vector256);
+    vandpd(dst, nds, as_Address(src), vector_len);
   } else {
     lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+    vandpd(dst, nds, Address(rscratch1, 0), vector_len);
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
-    vandps(dst, nds, as_Address(src), vector256);
+    vandps(dst, nds, as_Address(src), vector_len);
   } else {
     lea(rscratch1, src);
-    vandps(dst, nds, Address(rscratch1, 0), vector256);
+    vandps(dst, nds, Address(rscratch1, 0), vector_len);
   }
 }
 
@@ -4068,21 +4068,21 @@
   }
 }
 
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
-    vxorpd(dst, nds, as_Address(src), vector256);
+    vxorpd(dst, nds, as_Address(src), vector_len);
   } else {
     lea(rscratch1, src);
-    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+    vxorpd(dst, nds, Address(rscratch1, 0), vector_len);
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
-    vxorps(dst, nds, as_Address(src), vector256);
+    vxorps(dst, nds, as_Address(src), vector_len);
   } else {
     lea(rscratch1, src);
-    vxorps(dst, nds, Address(rscratch1, 0), vector256);
+    vxorps(dst, nds, Address(rscratch1, 0), vector_len);
   }
 }
 
@@ -4561,6 +4561,14 @@
     movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
     movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
   } else if (UseSSE >= 2)  {
+    if (UseAVX > 2) {
+      movl(rbx, 0xffff);
+#ifdef _LP64
+      kmovql(k1, rbx);
+#else
+      kmovdl(k1, rbx);
+#endif
+    }
 #ifdef COMPILER2
     if (MaxVectorSize > 16) {
       assert(UseAVX > 0, "256bit vectors are supported only with AVX");
@@ -7063,8 +7071,39 @@
     {
       assert( UseSSE >= 2, "supported cpu only" );
       Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+      if (UseAVX > 2) {
+        movl(rtmp, 0xffff);
+#ifdef _LP64
+        kmovql(k1, rtmp);
+#else
+        kmovdl(k1, rtmp);
+#endif
+      }
       movdl(xtmp, value);
-      if (UseAVX >= 2 && UseUnalignedLoadStores) {
+      if (UseAVX > 2 && UseUnalignedLoadStores) {
+        // Fill 64-byte chunks
+        Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
+        evpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
+
+        subl(count, 16 << shift);
+        jcc(Assembler::less, L_check_fill_32_bytes);
+        align(16);
+
+        BIND(L_fill_64_bytes_loop);
+        evmovdqu(Address(to, 0), xtmp, Assembler::AVX_512bit);
+        addptr(to, 64);
+        subl(count, 16 << shift);
+        jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
+
+        BIND(L_check_fill_32_bytes);
+        addl(count, 8 << shift);
+        jccb(Assembler::less, L_check_fill_8_bytes);
+        evmovdqu(Address(to, 0), xtmp, Assembler::AVX_256bit);
+        addptr(to, 32);
+        subl(count, 8 << shift);
+
+        BIND(L_check_fill_8_bytes);
+      } else if (UseAVX == 2 && UseUnalignedLoadStores) {
         // Fill 64-byte chunks
         Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
         vpbroadcastd(xtmp, xtmp);
@@ -7200,11 +7239,11 @@
       bind(L_copy_32_chars);
       vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
       vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
-      vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
+      vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
       vptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in  vector
       jccb(Assembler::notZero, L_copy_32_chars_exit);
-      vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true);
-      vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true);
+      vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
+      vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
       vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
 
       bind(L_chars_32_check);
@@ -7227,13 +7266,13 @@
       vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
       vptest(tmp2Reg, tmp1Reg);
       jccb(Assembler::notZero, L_copy_16_chars_exit);
-      vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true);
-      vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true);
+      vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1);
+      vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1);
     } else {
       if (UseAVX > 0) {
         movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
         movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
-        vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false);
+        vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0);
       } else {
         movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
         por(tmp2Reg, tmp3Reg);
@@ -7776,7 +7815,7 @@
   if (UseAVX > 0) {
     vpclmulhdq(xtmp, xK, xcrc); // [123:64]
     vpclmulldq(xcrc, xK, xcrc); // [63:0]
-    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+    vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */);
     pxor(xcrc, xtmp);
   } else {
     movdqa(xtmp, xcrc);
@@ -7920,7 +7959,7 @@
   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
   if (UseAVX > 0) {
     vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
-    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+    vpand(xmm3, xmm0, xmm2, 0 /* vector_len */);
     vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
   } else {
     movdqa(xmm2, xmm0);