8145717: Use AVX3 instructions for Arrays.equals() intrinsic
authorkvn
Fri, 18 Dec 2015 15:29:43 -0800
changeset 35136 f3788128ff3f
parent 35135 dd2ce9021031
child 35137 c8f8f6de68a7
8145717: Use AVX3 instructions for Arrays.equals() intrinsic Reviewed-by: kvn Contributed-by: jan.civlin@intel.com
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Dec 18 12:39:02 2015 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Dec 18 15:29:43 2015 -0800
@@ -8579,13 +8579,53 @@
     // Compare 32-byte vectors
     andl(result, 0x0000001f);  //   tail count (in bytes)
     andl(limit, 0xffffffe0);   // vector count (in bytes)
-    jccb(Assembler::zero, COMPARE_TAIL);
+    jcc(Assembler::zero, COMPARE_TAIL);
 
     lea(ary1, Address(ary1, limit, Address::times_1));
     lea(ary2, Address(ary2, limit, Address::times_1));
     negptr(limit);
 
     bind(COMPARE_WIDE_VECTORS);
+
+#ifdef _LP64
+    if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
+      Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3;
+
+      cmpl(limit, -64);
+      jccb(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2);
+
+      bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
+
+      evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
+      evpcmpeqb(k7, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
+      kortestql(k7, k7);
+      jcc(Assembler::aboveEqual, FALSE_LABEL);     // miscompare
+      addptr(limit, 64);  // update since we already compared at this addr
+      cmpl(limit, -64);
+      jccb(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3);
+
+      // At this point we may still need to compare -limit+result bytes.
+      // We could execute the next two instruction and just continue via non-wide path:
+      //  cmpl(limit, 0);
+      //  jcc(Assembler::equal, COMPARE_TAIL);  // true
+      // But since we stopped at the points ary{1,2}+limit which are
+      // not farther than 64 bytes from the ends of arrays ary{1,2}+result
+      // (|limit| <= 32 and result < 32),
+      // we may just compare the last 64 bytes.
+      //
+      addptr(result, -64);   // it is safe, bc we just came from this area
+      evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
+      evpcmpeqb(k7, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
+      kortestql(k7, k7);
+      jcc(Assembler::aboveEqual, FALSE_LABEL);     // miscompare
+
+      jmp(TRUE_LABEL);
+
+      bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
+
+    }//if (VM_Version::supports_avx512vlbw())
+#endif //_LP64
+
     vmovdqu(vec1, Address(ary1, limit, Address::times_1));
     vmovdqu(vec2, Address(ary2, limit, Address::times_1));
     vpxor(vec1, vec2);