src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
changeset 53114 b0686d0be73f
parent 53089 147e2d96748d
child 53783 72709e703abd
equal deleted inserted replaced
53113:a92cd6585f60 53114:b0686d0be73f
  5646           prfm(Address(src, SoftwarePrefetchHintDistance));
  5646           prfm(Address(src, SoftwarePrefetchHintDistance));
  5647           orr(v4, T16B, Vtmp1, Vtmp2);
  5647           orr(v4, T16B, Vtmp1, Vtmp2);
  5648           orr(v5, T16B, Vtmp3, Vtmp4);
  5648           orr(v5, T16B, Vtmp3, Vtmp4);
  5649           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
  5649           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
  5650           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
  5650           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
  5651           stpq(Vtmp1, Vtmp3, dst);
       
  5652           uzp2(v5, T16B, v4, v5); // high bytes
  5651           uzp2(v5, T16B, v4, v5); // high bytes
  5653           umov(tmp2, v5, D, 1);
  5652           umov(tmp2, v5, D, 1);
  5654           fmovd(tmp1, v5);
  5653           fmovd(tmp1, v5);
  5655           orr(tmp1, tmp1, tmp2);
  5654           orr(tmp1, tmp1, tmp2);
  5656           cbnz(tmp1, LOOP_8);
  5655           cbnz(tmp1, LOOP_8);
       
  5656           stpq(Vtmp1, Vtmp3, dst);
  5657           sub(len, len, 32);
  5657           sub(len, len, 32);
  5658           add(dst, dst, 32);
  5658           add(dst, dst, 32);
  5659           add(src, src, 64);
  5659           add(src, src, 64);
  5660           subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
  5660           subs(tmp2, len, SoftwarePrefetchHintDistance/2 + 16);
  5661           br(GE, NEXT_32_PRFM);
  5661           br(GE, NEXT_32_PRFM);
  5669           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
  5669           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
  5670       }
  5670       }
  5671       prfm(Address(src, SoftwarePrefetchHintDistance));
  5671       prfm(Address(src, SoftwarePrefetchHintDistance));
  5672       uzp1(v4, T16B, Vtmp1, Vtmp2);
  5672       uzp1(v4, T16B, Vtmp1, Vtmp2);
  5673       uzp1(v5, T16B, Vtmp3, Vtmp4);
  5673       uzp1(v5, T16B, Vtmp3, Vtmp4);
  5674       stpq(v4, v5, dst);
       
  5675       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
  5674       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
  5676       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
  5675       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
  5677       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
  5676       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
  5678       umov(tmp2, Vtmp1, D, 1);
  5677       umov(tmp2, Vtmp1, D, 1);
  5679       fmovd(tmp1, Vtmp1);
  5678       fmovd(tmp1, Vtmp1);
  5680       orr(tmp1, tmp1, tmp2);
  5679       orr(tmp1, tmp1, tmp2);
  5681       cbnz(tmp1, LOOP_8);
  5680       cbnz(tmp1, LOOP_8);
       
  5681       stpq(v4, v5, dst);
  5682       sub(len, len, 32);
  5682       sub(len, len, 32);
  5683       add(dst, dst, 32);
  5683       add(dst, dst, 32);
  5684       add(src, src, 64);
  5684       add(src, src, 64);
  5685       cmp(len, (u1)32);
  5685       cmp(len, (u1)32);
  5686       br(GE, NEXT_32);
  5686       br(GE, NEXT_32);
  5691       br(LT, LOOP_1);
  5691       br(LT, LOOP_1);
  5692     BIND(NEXT_8);
  5692     BIND(NEXT_8);
  5693       ld1(Vtmp1, T8H, src);
  5693       ld1(Vtmp1, T8H, src);
  5694       uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
  5694       uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
  5695       uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
  5695       uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
  5696       strd(Vtmp2, dst);
       
  5697       fmovd(tmp1, Vtmp3);
  5696       fmovd(tmp1, Vtmp3);
  5698       cbnz(tmp1, NEXT_1);
  5697       cbnz(tmp1, NEXT_1);
       
  5698       strd(Vtmp2, dst);
  5699 
  5699 
  5700       sub(len, len, 8);
  5700       sub(len, len, 8);
  5701       add(dst, dst, 8);
  5701       add(dst, dst, 8);
  5702       add(src, src, 16);
  5702       add(src, src, 16);
  5703       cmp(len, (u1)8);
  5703       cmp(len, (u1)8);
  5706     BIND(LOOP_1);
  5706     BIND(LOOP_1);
  5707 #endif
  5707 #endif
  5708     cbz(len, DONE);
  5708     cbz(len, DONE);
  5709     BIND(NEXT_1);
  5709     BIND(NEXT_1);
  5710       ldrh(tmp1, Address(post(src, 2)));
  5710       ldrh(tmp1, Address(post(src, 2)));
  5711       strb(tmp1, Address(post(dst, 1)));
       
  5712       tst(tmp1, 0xff00);
  5711       tst(tmp1, 0xff00);
  5713       br(NE, SET_RESULT);
  5712       br(NE, SET_RESULT);
       
  5713       strb(tmp1, Address(post(dst, 1)));
  5714       subs(len, len, 1);
  5714       subs(len, len, 1);
  5715       br(GT, NEXT_1);
  5715       br(GT, NEXT_1);
  5716 
  5716 
  5717     BIND(SET_RESULT);
  5717     BIND(SET_RESULT);
  5718       sub(result, result, len); // Return index where we stopped
  5718       sub(result, result, len); // Return index where we stopped