5406 void MacroAssembler::encode_iso_array(Register src, Register dst, |
5406 void MacroAssembler::encode_iso_array(Register src, Register dst, |
5407 Register len, Register result, |
5407 Register len, Register result, |
5408 FloatRegister Vtmp1, FloatRegister Vtmp2, |
5408 FloatRegister Vtmp1, FloatRegister Vtmp2, |
5409 FloatRegister Vtmp3, FloatRegister Vtmp4) |
5409 FloatRegister Vtmp3, FloatRegister Vtmp4) |
5410 { |
5410 { |
5411 Label DONE, NEXT_32, LOOP_8, NEXT_8, LOOP_1, NEXT_1; |
5411 Label DONE, SET_RESULT, NEXT_32, NEXT_32_PRFM, LOOP_8, NEXT_8, LOOP_1, NEXT_1, |
5412 Register tmp1 = rscratch1; |
5412 NEXT_32_START, NEXT_32_PRFM_START; |
|
5413 Register tmp1 = rscratch1, tmp2 = rscratch2; |
5413 |
5414 |
5414 mov(result, len); // Save initial len |
5415 mov(result, len); // Save initial len |
5415 |
5416 |
5416 #ifndef BUILTIN_SIM |
5417 #ifndef BUILTIN_SIM |
5417 subs(len, len, 32); |
5418 cmp(len, 8); // handle shortest strings first |
5418 br(LT, LOOP_8); |
5419 br(LT, LOOP_1); |
5419 |
5420 cmp(len, 32); |
5420 // The following code uses the SIMD 'uqxtn' and 'uqxtn2' instructions |
5421 br(LT, NEXT_8); |
5421 // to convert chars to bytes. These set the 'QC' bit in the FPSR if |
5422 // The following code uses the SIMD 'uzp1' and 'uzp2' instructions |
5422 // any char could not fit in a byte, so clear the FPSR so we can test it. |
5423 // to convert chars to bytes |
5423 clear_fpsr(); |
5424 if (SoftwarePrefetchHintDistance >= 0) { |
5424 |
5425 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); |
5425 BIND(NEXT_32); |
5426 cmp(len, SoftwarePrefetchHintDistance/2 + 16); |
5426 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); |
5427 br(LE, NEXT_32_START); |
5427 uqxtn(Vtmp1, T8B, Vtmp1, T8H); // uqxtn - write bottom half |
5428 b(NEXT_32_PRFM_START); |
5428 uqxtn(Vtmp1, T16B, Vtmp2, T8H); // uqxtn2 - write top half |
5429 BIND(NEXT_32_PRFM); |
5429 uqxtn(Vtmp2, T8B, Vtmp3, T8H); |
5430 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); |
5430 uqxtn(Vtmp2, T16B, Vtmp4, T8H); // uqxtn2 |
5431 BIND(NEXT_32_PRFM_START); |
5431 get_fpsr(tmp1); |
5432 prfm(Address(src, SoftwarePrefetchHintDistance)); |
5432 cbnzw(tmp1, LOOP_8); |
5433 orr(v4, T16B, Vtmp1, Vtmp2); |
5433 st1(Vtmp1, Vtmp2, T16B, post(dst, 32)); |
5434 orr(v5, T16B, Vtmp3, Vtmp4); |
5434 subs(len, len, 32); |
5435 uzp1(Vtmp1, T16B, Vtmp1, Vtmp2); |
|
5436 uzp1(Vtmp3, T16B, Vtmp3, Vtmp4); |
|
5437 stpq(Vtmp1, Vtmp3, dst); |
|
5438 uzp2(v5, T16B, v4, v5); // high bytes |
|
5439 umov(tmp2, v5, D, 1); |
|
5440 fmovd(tmp1, v5); |
|
5441 orr(tmp1, tmp1, tmp2); |
|
5442 cbnz(tmp1, LOOP_8); |
|
5443 sub(len, len, 32); |
|
5444 add(dst, dst, 32); |
|
5445 add(src, src, 64); |
|
5446 cmp(len, SoftwarePrefetchHintDistance/2 + 16); |
|
5447 br(GE, NEXT_32_PRFM); |
|
5448 cmp(len, 32); |
|
5449 br(LT, LOOP_8); |
|
5450 BIND(NEXT_32); |
|
5451 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); |
|
5452 BIND(NEXT_32_START); |
|
5453 } else { |
|
5454 BIND(NEXT_32); |
|
5455 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); |
|
5456 } |
|
5457 prfm(Address(src, SoftwarePrefetchHintDistance)); |
|
5458 uzp1(v4, T16B, Vtmp1, Vtmp2); |
|
5459 uzp1(v5, T16B, Vtmp3, Vtmp4); |
|
5460 stpq(v4, v5, dst); |
|
5461 orr(Vtmp1, T16B, Vtmp1, Vtmp2); |
|
5462 orr(Vtmp3, T16B, Vtmp3, Vtmp4); |
|
5463 uzp2(Vtmp1, T16B, Vtmp1, Vtmp3); // high bytes |
|
5464 umov(tmp2, Vtmp1, D, 1); |
|
5465 fmovd(tmp1, Vtmp1); |
|
5466 orr(tmp1, tmp1, tmp2); |
|
5467 cbnz(tmp1, LOOP_8); |
|
5468 sub(len, len, 32); |
|
5469 add(dst, dst, 32); |
5435 add(src, src, 64); |
5470 add(src, src, 64); |
|
5471 cmp(len, 32); |
5436 br(GE, NEXT_32); |
5472 br(GE, NEXT_32); |
|
5473 cbz(len, DONE); |
5437 |
5474 |
5438 BIND(LOOP_8); |
5475 BIND(LOOP_8); |
5439 adds(len, len, 32-8); |
5476 cmp(len, 8); |
5440 br(LT, LOOP_1); |
5477 br(LT, LOOP_1); |
5441 clear_fpsr(); // QC may be set from loop above, clear again |
|
5442 BIND(NEXT_8); |
5478 BIND(NEXT_8); |
5443 ld1(Vtmp1, T8H, src); |
5479 ld1(Vtmp1, T8H, src); |
5444 uqxtn(Vtmp1, T8B, Vtmp1, T8H); |
5480 uzp1(Vtmp2, T16B, Vtmp1, Vtmp1); // low bytes |
5445 get_fpsr(tmp1); |
5481 uzp2(Vtmp3, T16B, Vtmp1, Vtmp1); // high bytes |
5446 cbnzw(tmp1, LOOP_1); |
5482 strd(Vtmp2, dst); |
5447 st1(Vtmp1, T8B, post(dst, 8)); |
5483 fmovd(tmp1, Vtmp3); |
5448 subs(len, len, 8); |
5484 cbnz(tmp1, NEXT_1); |
|
5485 |
|
5486 sub(len, len, 8); |
|
5487 add(dst, dst, 8); |
5449 add(src, src, 16); |
5488 add(src, src, 16); |
|
5489 cmp(len, 8); |
5450 br(GE, NEXT_8); |
5490 br(GE, NEXT_8); |
5451 |
5491 |
5452 BIND(LOOP_1); |
5492 BIND(LOOP_1); |
5453 adds(len, len, 8); |
|
5454 br(LE, DONE); |
|
5455 #else |
|
5456 cbz(len, DONE); |
|
5457 #endif |
5493 #endif |
|
5494 cbz(len, DONE); |
5458 BIND(NEXT_1); |
5495 BIND(NEXT_1); |
5459 ldrh(tmp1, Address(post(src, 2))); |
5496 ldrh(tmp1, Address(post(src, 2))); |
|
5497 strb(tmp1, Address(post(dst, 1))); |
5460 tst(tmp1, 0xff00); |
5498 tst(tmp1, 0xff00); |
5461 br(NE, DONE); |
5499 br(NE, SET_RESULT); |
5462 strb(tmp1, Address(post(dst, 1))); |
|
5463 subs(len, len, 1); |
5500 subs(len, len, 1); |
5464 br(GT, NEXT_1); |
5501 br(GT, NEXT_1); |
5465 |
5502 |
5466 BIND(DONE); |
5503 BIND(SET_RESULT); |
5467 sub(result, result, len); // Return index where we stopped |
5504 sub(result, result, len); // Return index where we stopped |
5468 // Return len == 0 if we processed all |
5505 // Return len == 0 if we processed all |
5469 // characters |
5506 // characters |
|
5507 BIND(DONE); |
5470 } |
5508 } |
5471 |
5509 |
5472 |
5510 |
5473 // Inflate byte[] array to char[]. |
5511 // Inflate byte[] array to char[]. |
5474 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, |
5512 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, |