--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Wed May 22 20:12:19 2019 +0300
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Wed May 22 20:39:04 2019 +0300
@@ -4035,14 +4035,14 @@
: "compare_long_string_different_encoding UL");
address entry = __ pc();
Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
- DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, SMALL_LOOP_ENTER,
+ DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH,
LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
RegSet spilled_regs = RegSet::of(tmp3, tmp4);
- int prefetchLoopExitCondition = MAX(32, SoftwarePrefetchHintDistance/2);
+ int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2);
__ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
// cnt2 == amount of characters left to compare
@@ -4069,7 +4069,7 @@
if (SoftwarePrefetchHintDistance >= 0) {
__ subs(rscratch2, cnt2, prefetchLoopExitCondition);
- __ br(__ LT, SMALL_LOOP);
+ __ br(__ LT, NO_PREFETCH);
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
@@ -4089,51 +4089,20 @@
__ br(__ GE, LARGE_LOOP_PREFETCH);
}
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
+ __ bind(NO_PREFETCH);
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
- __ b(SMALL_LOOP_ENTER);
__ bind(SMALL_LOOP); // smaller loop
__ subs(cnt2, cnt2, 16);
- __ bind(SMALL_LOOP_ENTER);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ br(__ GE, SMALL_LOOP);
- __ cbz(cnt2, LOAD_LAST);
- __ bind(TAIL); // 1..15 characters left
- __ subs(zr, cnt2, -8);
- __ br(__ GT, TAIL_LOAD_16);
- __ ldrd(vtmp, Address(tmp2));
- __ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
-
- __ ldr(tmpU, Address(__ post(cnt1, 8)));
- __ fmovd(tmpL, vtmp3);
- __ eor(rscratch2, tmp3, tmpL);
- __ cbnz(rscratch2, DIFF2);
- __ umov(tmpL, vtmp3, __ D, 1);
- __ eor(rscratch2, tmpU, tmpL);
- __ cbnz(rscratch2, DIFF1);
- __ b(LOAD_LAST);
- __ bind(TAIL_LOAD_16);
- __ ldrq(vtmp, Address(tmp2));
- __ ldr(tmpU, Address(__ post(cnt1, 8)));
- __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
- __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
- __ fmovd(tmpL, vtmp3);
- __ eor(rscratch2, tmp3, tmpL);
- __ cbnz(rscratch2, DIFF2);
-
- __ ldr(tmp3, Address(__ post(cnt1, 8)));
- __ umov(tmpL, vtmp3, __ D, 1);
- __ eor(rscratch2, tmpU, tmpL);
- __ cbnz(rscratch2, DIFF1);
-
- __ ldr(tmpU, Address(__ post(cnt1, 8)));
- __ fmovd(tmpL, vtmp);
- __ eor(rscratch2, tmp3, tmpL);
- __ cbnz(rscratch2, DIFF2);
-
- __ umov(tmpL, vtmp, __ D, 1);
- __ eor(rscratch2, tmpU, tmpL);
- __ cbnz(rscratch2, DIFF1);
+ __ cmn(cnt2, (u1)16);
+ __ br(__ EQ, LOAD_LAST);
+ __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
+ __ add(cnt1, cnt1, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string
+ __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string
+ __ ldr(tmp3, Address(cnt1, -8));
+ compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load
__ b(LOAD_LAST);
__ bind(DIFF2);
__ mov(tmpU, tmp3);
@@ -4141,10 +4110,12 @@
__ pop(spilled_regs, sp);
__ b(CALCULATE_DIFFERENCE);
__ bind(LOAD_LAST);
+ // Last 4 UTF-16 characters are already pre-loaded into tmp3 by compare_string_16_x_LU.
+ // No need to load it again
+ __ mov(tmpU, tmp3);
__ pop(spilled_regs, sp);
__ ldrs(vtmp, Address(strL));
- __ ldr(tmpU, Address(strU));
__ zip1(vtmp, __ T8B, vtmp, vtmpZ);
__ fmovd(tmpL, vtmp);
@@ -4206,10 +4177,10 @@
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
- // less than 16 bytes left?
- __ subs(cnt2, cnt2, isLL ? 16 : 8);
- __ br(__ LT, TAIL);
}
+ // less than 16 bytes left?
+ __ subs(cnt2, cnt2, isLL ? 16 : 8);
+ __ br(__ LT, TAIL);
__ bind(SMALL_LOOP);
compare_string_16_bytes_same(DIFF, DIFF2);
__ subs(cnt2, cnt2, isLL ? 16 : 8);