src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
changeset 53114 b0686d0be73f
parent 53089 147e2d96748d
child 53783 72709e703abd
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Dec 21 11:02:36 2018 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Dec 21 17:14:46 2018 +0000
@@ -5648,12 +5648,12 @@
           orr(v5, T16B, Vtmp3, Vtmp4);
           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
-          stpq(Vtmp1, Vtmp3, dst);
           uzp2(v5, T16B, v4, v5); // high bytes
           umov(tmp2, v5, D, 1);
           fmovd(tmp1, v5);
           orr(tmp1, tmp1, tmp2);
           cbnz(tmp1, LOOP_8);
+          stpq(Vtmp1, Vtmp3, dst);
           sub(len, len, 32);
           add(dst, dst, 32);
           add(src, src, 64);
@@ -5671,7 +5671,6 @@
       prfm(Address(src, SoftwarePrefetchHintDistance));
       uzp1(v4, T16B, Vtmp1, Vtmp2);
       uzp1(v5, T16B, Vtmp3, Vtmp4);
-      stpq(v4, v5, dst);
       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
@@ -5679,6 +5678,7 @@
       fmovd(tmp1, Vtmp1);
       orr(tmp1, tmp1, tmp2);
       cbnz(tmp1, LOOP_8);
+      stpq(v4, v5, dst);
       sub(len, len, 32);
       add(dst, dst, 32);
       add(src, src, 64);
@@ -5693,9 +5693,9 @@
       ld1(Vtmp1, T8H, src);
       uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
       uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
-      strd(Vtmp2, dst);
       fmovd(tmp1, Vtmp3);
       cbnz(tmp1, NEXT_1);
+      strd(Vtmp2, dst);
 
       sub(len, len, 8);
       add(dst, dst, 8);
@@ -5708,9 +5708,9 @@
     cbz(len, DONE);
     BIND(NEXT_1);
       ldrh(tmp1, Address(post(src, 2)));
-      strb(tmp1, Address(post(dst, 1)));
       tst(tmp1, 0xff00);
       br(NE, SET_RESULT);
+      strb(tmp1, Address(post(dst, 1)));
       subs(len, len, 1);
       br(GT, NEXT_1);