src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
changeset 54991 82fd8793ba5e
parent 54990 cbc557f166f2
child 55379 865775b86780
child 58678 9cf78a70fa4f
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Wed May 22 20:12:19 2019 +0300
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Wed May 22 20:39:04 2019 +0300
@@ -4035,14 +4035,14 @@
         : "compare_long_string_different_encoding UL");
     address entry = __ pc();
     Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
-        DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, SMALL_LOOP_ENTER,
+        DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH,
         LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
         tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
     FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
     RegSet spilled_regs = RegSet::of(tmp3, tmp4);
 
-    int prefetchLoopExitCondition = MAX(32, SoftwarePrefetchHintDistance/2);
+    int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2);
 
     __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
     // cnt2 == amount of characters left to compare
@@ -4069,7 +4069,7 @@
 
     if (SoftwarePrefetchHintDistance >= 0) {
       __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
-      __ br(__ LT, SMALL_LOOP);
+      __ br(__ LT, NO_PREFETCH);
       __ bind(LARGE_LOOP_PREFETCH);
         __ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
         __ mov(tmp4, 2);
@@ -4089,51 +4089,20 @@
           __ br(__ GE, LARGE_LOOP_PREFETCH);
     }
     __ cbz(cnt2, LOAD_LAST); // no characters left except last load
+    __ bind(NO_PREFETCH);
     __ subs(cnt2, cnt2, 16);
     __ br(__ LT, TAIL);
-    __ b(SMALL_LOOP_ENTER);
     __ bind(SMALL_LOOP); // smaller loop
       __ subs(cnt2, cnt2, 16);
-    __ bind(SMALL_LOOP_ENTER);
       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
       __ br(__ GE, SMALL_LOOP);
-      __ cbz(cnt2, LOAD_LAST);
-    __ bind(TAIL); // 1..15 characters left
-      __ subs(zr, cnt2, -8);
-      __ br(__ GT, TAIL_LOAD_16);
-      __ ldrd(vtmp, Address(tmp2));
-      __ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
-
-      __ ldr(tmpU, Address(__ post(cnt1, 8)));
-      __ fmovd(tmpL, vtmp3);
-      __ eor(rscratch2, tmp3, tmpL);
-      __ cbnz(rscratch2, DIFF2);
-      __ umov(tmpL, vtmp3, __ D, 1);
-      __ eor(rscratch2, tmpU, tmpL);
-      __ cbnz(rscratch2, DIFF1);
-      __ b(LOAD_LAST);
-    __ bind(TAIL_LOAD_16);
-      __ ldrq(vtmp, Address(tmp2));
-      __ ldr(tmpU, Address(__ post(cnt1, 8)));
-      __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
-      __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
-      __ fmovd(tmpL, vtmp3);
-      __ eor(rscratch2, tmp3, tmpL);
-      __ cbnz(rscratch2, DIFF2);
-
-      __ ldr(tmp3, Address(__ post(cnt1, 8)));
-      __ umov(tmpL, vtmp3, __ D, 1);
-      __ eor(rscratch2, tmpU, tmpL);
-      __ cbnz(rscratch2, DIFF1);
-
-      __ ldr(tmpU, Address(__ post(cnt1, 8)));
-      __ fmovd(tmpL, vtmp);
-      __ eor(rscratch2, tmp3, tmpL);
-      __ cbnz(rscratch2, DIFF2);
-
-      __ umov(tmpL, vtmp, __ D, 1);
-      __ eor(rscratch2, tmpU, tmpL);
-      __ cbnz(rscratch2, DIFF1);
+      __ cmn(cnt2, (u1)16);
+      __ br(__ EQ, LOAD_LAST);
+    __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
+      __ add(cnt1, cnt1, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string
+      __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string
+      __ ldr(tmp3, Address(cnt1, -8));
+      compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load
       __ b(LOAD_LAST);
     __ bind(DIFF2);
       __ mov(tmpU, tmp3);
@@ -4141,10 +4110,12 @@
       __ pop(spilled_regs, sp);
       __ b(CALCULATE_DIFFERENCE);
     __ bind(LOAD_LAST);
+      // Last 4 UTF-16 characters are already pre-loaded into tmp3 by compare_string_16_x_LU.
+      // No need to load it again
+      __ mov(tmpU, tmp3);
       __ pop(spilled_regs, sp);
 
       __ ldrs(vtmp, Address(strL));
-      __ ldr(tmpU, Address(strU));
       __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
       __ fmovd(tmpL, vtmp);
 
@@ -4206,10 +4177,10 @@
         compare_string_16_bytes_same(DIFF, DIFF2);
         __ br(__ GT, LARGE_LOOP_PREFETCH);
         __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
-        // less than 16 bytes left?
-        __ subs(cnt2, cnt2, isLL ? 16 : 8);
-        __ br(__ LT, TAIL);
     }
+    // less than 16 bytes left?
+    __ subs(cnt2, cnt2, isLL ? 16 : 8);
+    __ br(__ LT, TAIL);
     __ bind(SMALL_LOOP);
       compare_string_16_bytes_same(DIFF, DIFF2);
       __ subs(cnt2, cnt2, isLL ? 16 : 8);