src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
changeset 50723 671b02f0e450
parent 50716 77fdd64c6334
child 50728 9375184cec98
equal deleted inserted replaced
50722:bc104aaf24e9 50723:671b02f0e450
  5406 void MacroAssembler::encode_iso_array(Register src, Register dst,
  5406 void MacroAssembler::encode_iso_array(Register src, Register dst,
  5407                       Register len, Register result,
  5407                       Register len, Register result,
  5408                       FloatRegister Vtmp1, FloatRegister Vtmp2,
  5408                       FloatRegister Vtmp1, FloatRegister Vtmp2,
  5409                       FloatRegister Vtmp3, FloatRegister Vtmp4)
  5409                       FloatRegister Vtmp3, FloatRegister Vtmp4)
  5410 {
  5410 {
  5411     Label DONE, NEXT_32, LOOP_8, NEXT_8, LOOP_1, NEXT_1;
  5411     Label DONE, SET_RESULT, NEXT_32, NEXT_32_PRFM, LOOP_8, NEXT_8, LOOP_1, NEXT_1,
  5412     Register tmp1 = rscratch1;
  5412         NEXT_32_START, NEXT_32_PRFM_START;
       
  5413     Register tmp1 = rscratch1, tmp2 = rscratch2;
  5413 
  5414 
  5414       mov(result, len); // Save initial len
  5415       mov(result, len); // Save initial len
  5415 
  5416 
  5416 #ifndef BUILTIN_SIM
  5417 #ifndef BUILTIN_SIM
  5417       subs(len, len, 32);
  5418       cmp(len, 8); // handle shortest strings first
  5418       br(LT, LOOP_8);
  5419       br(LT, LOOP_1);
  5419 
  5420       cmp(len, 32);
  5420 // The following code uses the SIMD 'uqxtn' and 'uqxtn2' instructions
  5421       br(LT, NEXT_8);
  5421 // to convert chars to bytes. These set the 'QC' bit in the FPSR if
  5422       // The following code uses the SIMD 'uzp1' and 'uzp2' instructions
  5422 // any char could not fit in a byte, so clear the FPSR so we can test it.
  5423       // to convert chars to bytes
  5423       clear_fpsr();
  5424       if (SoftwarePrefetchHintDistance >= 0) {
  5424 
  5425         ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
  5425     BIND(NEXT_32);
  5426         cmp(len, SoftwarePrefetchHintDistance/2 + 16);
  5426       ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
  5427         br(LE, NEXT_32_START);
  5427       uqxtn(Vtmp1, T8B, Vtmp1, T8H);  // uqxtn  - write bottom half
  5428         b(NEXT_32_PRFM_START);
  5428       uqxtn(Vtmp1, T16B, Vtmp2, T8H); // uqxtn2 - write top half
  5429         BIND(NEXT_32_PRFM);
  5429       uqxtn(Vtmp2, T8B, Vtmp3, T8H);
  5430           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
  5430       uqxtn(Vtmp2, T16B, Vtmp4, T8H); // uqxtn2
  5431         BIND(NEXT_32_PRFM_START);
  5431       get_fpsr(tmp1);
  5432           prfm(Address(src, SoftwarePrefetchHintDistance));
  5432       cbnzw(tmp1, LOOP_8);
  5433           orr(v4, T16B, Vtmp1, Vtmp2);
  5433       st1(Vtmp1, Vtmp2, T16B, post(dst, 32));
  5434           orr(v5, T16B, Vtmp3, Vtmp4);
  5434       subs(len, len, 32);
  5435           uzp1(Vtmp1, T16B, Vtmp1, Vtmp2);
       
  5436           uzp1(Vtmp3, T16B, Vtmp3, Vtmp4);
       
  5437           stpq(Vtmp1, Vtmp3, dst);
       
  5438           uzp2(v5, T16B, v4, v5); // high bytes
       
  5439           umov(tmp2, v5, D, 1);
       
  5440           fmovd(tmp1, v5);
       
  5441           orr(tmp1, tmp1, tmp2);
       
  5442           cbnz(tmp1, LOOP_8);
       
  5443           sub(len, len, 32);
       
  5444           add(dst, dst, 32);
       
  5445           add(src, src, 64);
       
  5446           cmp(len, SoftwarePrefetchHintDistance/2 + 16);
       
  5447           br(GE, NEXT_32_PRFM);
       
  5448           cmp(len, 32);
       
  5449           br(LT, LOOP_8);
       
  5450         BIND(NEXT_32);
       
  5451           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
       
  5452         BIND(NEXT_32_START);
       
  5453       } else {
       
  5454         BIND(NEXT_32);
       
  5455           ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
       
  5456       }
       
  5457       prfm(Address(src, SoftwarePrefetchHintDistance));
       
  5458       uzp1(v4, T16B, Vtmp1, Vtmp2);
       
  5459       uzp1(v5, T16B, Vtmp3, Vtmp4);
       
  5460       stpq(v4, v5, dst);
       
  5461       orr(Vtmp1, T16B, Vtmp1, Vtmp2);
       
  5462       orr(Vtmp3, T16B, Vtmp3, Vtmp4);
       
  5463       uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes
       
  5464       umov(tmp2, Vtmp1, D, 1);
       
  5465       fmovd(tmp1, Vtmp1);
       
  5466       orr(tmp1, tmp1, tmp2);
       
  5467       cbnz(tmp1, LOOP_8);
       
  5468       sub(len, len, 32);
       
  5469       add(dst, dst, 32);
  5435       add(src, src, 64);
  5470       add(src, src, 64);
       
  5471       cmp(len, 32);
  5436       br(GE, NEXT_32);
  5472       br(GE, NEXT_32);
       
  5473       cbz(len, DONE);
  5437 
  5474 
  5438     BIND(LOOP_8);
  5475     BIND(LOOP_8);
  5439       adds(len, len, 32-8);
  5476       cmp(len, 8);
  5440       br(LT, LOOP_1);
  5477       br(LT, LOOP_1);
  5441       clear_fpsr(); // QC may be set from loop above, clear again
       
  5442     BIND(NEXT_8);
  5478     BIND(NEXT_8);
  5443       ld1(Vtmp1, T8H, src);
  5479       ld1(Vtmp1, T8H, src);
  5444       uqxtn(Vtmp1, T8B, Vtmp1, T8H);
  5480       uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes
  5445       get_fpsr(tmp1);
  5481       uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes
  5446       cbnzw(tmp1, LOOP_1);
  5482       strd(Vtmp2, dst);
  5447       st1(Vtmp1, T8B, post(dst, 8));
  5483       fmovd(tmp1, Vtmp3);
  5448       subs(len, len, 8);
  5484       cbnz(tmp1, NEXT_1);
       
  5485 
       
  5486       sub(len, len, 8);
       
  5487       add(dst, dst, 8);
  5449       add(src, src, 16);
  5488       add(src, src, 16);
       
  5489       cmp(len, 8);
  5450       br(GE, NEXT_8);
  5490       br(GE, NEXT_8);
  5451 
  5491 
  5452     BIND(LOOP_1);
  5492     BIND(LOOP_1);
  5453       adds(len, len, 8);
       
  5454       br(LE, DONE);
       
  5455 #else
       
  5456       cbz(len, DONE);
       
  5457 #endif
  5493 #endif
       
  5494     cbz(len, DONE);
  5458     BIND(NEXT_1);
  5495     BIND(NEXT_1);
  5459       ldrh(tmp1, Address(post(src, 2)));
  5496       ldrh(tmp1, Address(post(src, 2)));
       
  5497       strb(tmp1, Address(post(dst, 1)));
  5460       tst(tmp1, 0xff00);
  5498       tst(tmp1, 0xff00);
  5461       br(NE, DONE);
  5499       br(NE, SET_RESULT);
  5462       strb(tmp1, Address(post(dst, 1)));
       
  5463       subs(len, len, 1);
  5500       subs(len, len, 1);
  5464       br(GT, NEXT_1);
  5501       br(GT, NEXT_1);
  5465 
  5502 
  5466     BIND(DONE);
  5503     BIND(SET_RESULT);
  5467       sub(result, result, len); // Return index where we stopped
  5504       sub(result, result, len); // Return index where we stopped
  5468                                 // Return len == 0 if we processed all
  5505                                 // Return len == 0 if we processed all
  5469                                 // characters
  5506                                 // characters
       
  5507     BIND(DONE);
  5470 }
  5508 }
  5471 
  5509 
  5472 
  5510 
  5473 // Inflate byte[] array to char[].
  5511 // Inflate byte[] array to char[].
  5474 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
  5512 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,