4251 load_ptr_contents(base, G6_heapbase); |
4254 load_ptr_contents(base, G6_heapbase); |
4252 } |
4255 } |
4253 } |
4256 } |
4254 } |
4257 } |
4255 |
4258 |
4256 // Compare char[] arrays aligned to 4 bytes. |
4259 #ifdef COMPILER2 |
4257 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, |
4260 |
4258 Register limit, Register result, |
4261 // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure. |
4259 Register chr1, Register chr2, Label& Ldone) { |
4262 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result, |
4260 Label Lvector, Lloop; |
4263 Register tmp1, Register tmp2, Register tmp3, Register tmp4, |
4261 assert(chr1 == result, "should be the same"); |
4264 FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) { |
4262 |
4265 Label Lloop, Lslow; |
4263 // Note: limit contains number of bytes (2*char_elements) != 0. |
4266 assert(UseVIS >= 3, "VIS3 is required"); |
4264 andcc(limit, 0x2, chr1); // trailing character ? |
4267 assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result); |
|
4268 assert_different_registers(ftmp1, ftmp2, ftmp3); |
|
4269 |
|
4270 // Check if cnt >= 8 (= 16 bytes) |
|
4271 cmp(cnt, 8); |
|
4272 br(Assembler::less, false, Assembler::pn, Lslow); |
|
4273 delayed()->mov(cnt, result); // copy count |
|
4274 |
|
4275 // Check for 8-byte alignment of src and dst |
|
4276 or3(src, dst, tmp1); |
|
4277 andcc(tmp1, 7, G0); |
|
4278 br(Assembler::notZero, false, Assembler::pn, Lslow); |
|
4279 delayed()->nop(); |
|
4280 |
|
4281 // Set mask for bshuffle instruction |
|
4282 Register mask = tmp4; |
|
4283 set(0x13579bdf, mask); |
|
4284 bmask(mask, G0, G0); |
|
4285 |
|
4286 // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters |
|
4287 Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00 |
|
4288 add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00 |
|
4289 sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000 |
|
4290 or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00 |
|
4291 |
|
4292 // Load first 8 bytes |
|
4293 ldx(src, 0, tmp1); |
|
4294 |
|
4295 bind(Lloop); |
|
4296 // Load next 8 bytes |
|
4297 ldx(src, 8, tmp2); |
|
4298 |
|
4299 // Check for non-latin1 character by testing if the most significant byte of a char is set. |
|
4300 // Although we have to move the data between integer and floating point registers, this is |
|
4301 // still faster than the corresponding VIS instructions (ford/fand/fcmpd). |
|
4302 or3(tmp1, tmp2, tmp3); |
|
4303 btst(tmp3, mask); |
|
4304 // annul zeroing if branch is not taken to preserve original count |
|
4305 brx(Assembler::notZero, true, Assembler::pn, Ldone); |
|
4306 delayed()->mov(G0, result); // 0 - failed |
|
4307 |
|
4308 // Move bytes into float register |
|
4309 movxtod(tmp1, ftmp1); |
|
4310 movxtod(tmp2, ftmp2); |
|
4311 |
|
4312 // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3 |
|
4313 bshuffle(ftmp1, ftmp2, ftmp3); |
|
4314 stf(FloatRegisterImpl::D, ftmp3, dst, 0); |
|
4315 |
|
4316 // Increment addresses and decrement count |
|
4317 inc(src, 16); |
|
4318 inc(dst, 8); |
|
4319 dec(cnt, 8); |
|
4320 |
|
4321 cmp(cnt, 8); |
|
4322 // annul LDX if branch is not taken to prevent access past end of string |
|
4323 br(Assembler::greaterEqual, true, Assembler::pt, Lloop); |
|
4324 delayed()->ldx(src, 0, tmp1); |
|
4325 |
|
4326 // Fallback to slow version |
|
4327 bind(Lslow); |
|
4328 } |
|
4329 |
|
4330 // Compress char[] to byte[]. Return 0 on failure. |
|
4331 void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) { |
|
4332 Label Lloop; |
|
4333 assert_different_registers(src, dst, cnt, tmp, result); |
|
4334 |
|
4335 lduh(src, 0, tmp); |
|
4336 |
|
4337 bind(Lloop); |
|
4338 inc(src, sizeof(jchar)); |
|
4339 cmp(tmp, 0xff); |
|
4340 // annul zeroing if branch is not taken to preserve original count |
|
4341 br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc |
|
4342 delayed()->mov(G0, result); // 0 - failed |
|
4343 deccc(cnt); |
|
4344 stb(tmp, dst, 0); |
|
4345 inc(dst); |
|
4346 // annul LDUH if branch is not taken to prevent access past end of string |
|
4347 br(Assembler::notZero, true, Assembler::pt, Lloop); |
|
4348 delayed()->lduh(src, 0, tmp); // hoisted |
|
4349 } |
|
4350 |
|
4351 // Inflate byte[] to char[] by inflating 16 bytes at once. |
|
4352 void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp, |
|
4353 FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) { |
|
4354 Label Lloop, Lslow; |
|
4355 assert(UseVIS >= 3, "VIS3 is required"); |
|
4356 assert_different_registers(src, dst, cnt, tmp); |
|
4357 assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4); |
|
4358 |
|
4359 // Check if cnt >= 8 (= 16 bytes) |
|
4360 cmp(cnt, 8); |
|
4361 br(Assembler::less, false, Assembler::pn, Lslow); |
|
4362 delayed()->nop(); |
|
4363 |
|
4364 // Check for 8-byte alignment of src and dst |
|
4365 or3(src, dst, tmp); |
|
4366 andcc(tmp, 7, G0); |
|
4367 br(Assembler::notZero, false, Assembler::pn, Lslow); |
|
4368 // Initialize float register to zero |
|
4369 FloatRegister zerof = ftmp4; |
|
4370 delayed()->fzero(FloatRegisterImpl::D, zerof); |
|
4371 |
|
4372 // Load first 8 bytes |
|
4373 ldf(FloatRegisterImpl::D, src, 0, ftmp1); |
|
4374 |
|
4375 bind(Lloop); |
|
4376 inc(src, 8); |
|
4377 dec(cnt, 8); |
|
4378 |
|
4379 // Inflate the string by interleaving each byte from the source array |
|
4380 // with a zero byte and storing the result in the destination array. |
|
4381 fpmerge(zerof, ftmp1->successor(), ftmp2); |
|
4382 stf(FloatRegisterImpl::D, ftmp2, dst, 8); |
|
4383 fpmerge(zerof, ftmp1, ftmp3); |
|
4384 stf(FloatRegisterImpl::D, ftmp3, dst, 0); |
|
4385 |
|
4386 inc(dst, 16); |
|
4387 |
|
4388 cmp(cnt, 8); |
|
4389 // annul LDX if branch is not taken to prevent access past end of string |
|
4390 br(Assembler::greaterEqual, true, Assembler::pt, Lloop); |
|
4391 delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1); |
|
4392 |
|
4393 // Fallback to slow version |
|
4394 bind(Lslow); |
|
4395 } |
|
4396 |
|
4397 // Inflate byte[] to char[]. |
|
4398 void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) { |
|
4399 Label Loop; |
|
4400 assert_different_registers(src, dst, cnt, tmp); |
|
4401 |
|
4402 ldub(src, 0, tmp); |
|
4403 bind(Loop); |
|
4404 inc(src); |
|
4405 deccc(cnt); |
|
4406 sth(tmp, dst, 0); |
|
4407 inc(dst, sizeof(jchar)); |
|
4408 // annul LDUB if branch is not taken to prevent access past end of string |
|
4409 br(Assembler::notZero, true, Assembler::pt, Loop); |
|
4410 delayed()->ldub(src, 0, tmp); // hoisted |
|
4411 } |
|
4412 |
|
4413 void MacroAssembler::string_compare(Register str1, Register str2, |
|
4414 Register cnt1, Register cnt2, |
|
4415 Register tmp1, Register tmp2, |
|
4416 Register result, int ae) { |
|
4417 Label Ldone, Lloop; |
|
4418 assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result); |
|
4419 int stride1, stride2; |
|
4420 |
|
4421 // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a) |
|
4422 // we interchange str1 and str2 in the UL case and negate the result. |
|
4423 // Like this, str1 is always latin1 encoded, expect for the UU case. |
|
4424 |
|
4425 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
|
4426 srl(cnt2, 1, cnt2); |
|
4427 } |
|
4428 |
|
4429 // See if the lengths are different, and calculate min in cnt1. |
|
4430 // Save diff in case we need it for a tie-breaker. |
|
4431 Label Lskip; |
|
4432 Register diff = tmp1; |
|
4433 subcc(cnt1, cnt2, diff); |
|
4434 br(Assembler::greater, true, Assembler::pt, Lskip); |
|
4435 // cnt2 is shorter, so use its count: |
|
4436 delayed()->mov(cnt2, cnt1); |
|
4437 bind(Lskip); |
|
4438 |
|
4439 // Rename registers |
|
4440 Register limit1 = cnt1; |
|
4441 Register limit2 = limit1; |
|
4442 Register chr1 = result; |
|
4443 Register chr2 = cnt2; |
|
4444 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
|
4445 // We need an additional register to keep track of two limits |
|
4446 assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result); |
|
4447 limit2 = tmp2; |
|
4448 } |
|
4449 |
|
4450 // Is the minimum length zero? |
|
4451 cmp(limit1, (int)0); // use cast to resolve overloading ambiguity |
|
4452 br(Assembler::equal, true, Assembler::pn, Ldone); |
|
4453 // result is difference in lengths |
|
4454 if (ae == StrIntrinsicNode::UU) { |
|
4455 delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars |
|
4456 } else { |
|
4457 delayed()->mov(diff, result); |
|
4458 } |
|
4459 |
|
4460 // Load first characters |
|
4461 if (ae == StrIntrinsicNode::LL) { |
|
4462 stride1 = stride2 = sizeof(jbyte); |
|
4463 ldub(str1, 0, chr1); |
|
4464 ldub(str2, 0, chr2); |
|
4465 } else if (ae == StrIntrinsicNode::UU) { |
|
4466 stride1 = stride2 = sizeof(jchar); |
|
4467 lduh(str1, 0, chr1); |
|
4468 lduh(str2, 0, chr2); |
|
4469 } else { |
|
4470 stride1 = sizeof(jbyte); |
|
4471 stride2 = sizeof(jchar); |
|
4472 ldub(str1, 0, chr1); |
|
4473 lduh(str2, 0, chr2); |
|
4474 } |
|
4475 |
|
4476 // Compare first characters |
|
4477 subcc(chr1, chr2, chr1); |
|
4478 br(Assembler::notZero, false, Assembler::pt, Ldone); |
|
4479 assert(chr1 == result, "result must be pre-placed"); |
|
4480 delayed()->nop(); |
|
4481 |
|
4482 // Check if the strings start at same location |
|
4483 cmp(str1, str2); |
|
4484 brx(Assembler::equal, true, Assembler::pn, Ldone); |
|
4485 delayed()->mov(G0, result); // result is zero |
|
4486 |
|
4487 // We have no guarantee that on 64 bit the higher half of limit is 0 |
|
4488 signx(limit1); |
|
4489 |
|
4490 // Get limit |
|
4491 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
|
4492 sll(limit1, 1, limit2); |
|
4493 subcc(limit2, stride2, chr2); |
|
4494 } |
|
4495 subcc(limit1, stride1, chr1); |
|
4496 br(Assembler::zero, true, Assembler::pn, Ldone); |
|
4497 // result is difference in lengths |
|
4498 if (ae == StrIntrinsicNode::UU) { |
|
4499 delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars |
|
4500 } else { |
|
4501 delayed()->mov(diff, result); |
|
4502 } |
|
4503 |
|
4504 // Shift str1 and str2 to the end of the arrays, negate limit |
|
4505 add(str1, limit1, str1); |
|
4506 add(str2, limit2, str2); |
|
4507 neg(chr1, limit1); // limit1 = -(limit1-stride1) |
|
4508 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
|
4509 neg(chr2, limit2); // limit2 = -(limit2-stride2) |
|
4510 } |
|
4511 |
|
4512 // Compare the rest of the characters |
|
4513 if (ae == StrIntrinsicNode::UU) { |
|
4514 lduh(str1, limit1, chr1); |
|
4515 } else { |
|
4516 ldub(str1, limit1, chr1); |
|
4517 } |
|
4518 |
|
4519 bind(Lloop); |
|
4520 if (ae == StrIntrinsicNode::LL) { |
|
4521 ldub(str2, limit2, chr2); |
|
4522 } else { |
|
4523 lduh(str2, limit2, chr2); |
|
4524 } |
|
4525 |
|
4526 subcc(chr1, chr2, chr1); |
|
4527 br(Assembler::notZero, false, Assembler::pt, Ldone); |
|
4528 assert(chr1 == result, "result must be pre-placed"); |
|
4529 delayed()->inccc(limit1, stride1); |
|
4530 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
|
4531 inccc(limit2, stride2); |
|
4532 } |
|
4533 |
|
4534 // annul LDUB if branch is not taken to prevent access past end of string |
|
4535 br(Assembler::notZero, true, Assembler::pt, Lloop); |
|
4536 if (ae == StrIntrinsicNode::UU) { |
|
4537 delayed()->lduh(str1, limit2, chr1); |
|
4538 } else { |
|
4539 delayed()->ldub(str1, limit1, chr1); |
|
4540 } |
|
4541 |
|
4542 // If strings are equal up to min length, return the length difference. |
|
4543 if (ae == StrIntrinsicNode::UU) { |
|
4544 // Divide by 2 to get number of chars |
|
4545 sra(diff, 1, result); |
|
4546 } else { |
|
4547 mov(diff, result); |
|
4548 } |
|
4549 |
|
4550 // Otherwise, return the difference between the first mismatched chars. |
|
4551 bind(Ldone); |
|
4552 if(ae == StrIntrinsicNode::UL) { |
|
4553 // Negate result (see note above) |
|
4554 neg(result); |
|
4555 } |
|
4556 } |
|
4557 |
|
4558 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, |
|
4559 Register limit, Register tmp, Register result, bool is_byte) { |
|
4560 Label Ldone, Lvector, Lloop; |
|
4561 assert_different_registers(ary1, ary2, limit, tmp, result); |
|
4562 |
|
4563 int length_offset = arrayOopDesc::length_offset_in_bytes(); |
|
4564 int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR); |
|
4565 |
|
4566 if (is_array_equ) { |
|
4567 // return true if the same array |
|
4568 cmp(ary1, ary2); |
|
4569 brx(Assembler::equal, true, Assembler::pn, Ldone); |
|
4570 delayed()->add(G0, 1, result); // equal |
|
4571 |
|
4572 br_null(ary1, true, Assembler::pn, Ldone); |
|
4573 delayed()->mov(G0, result); // not equal |
|
4574 |
|
4575 br_null(ary2, true, Assembler::pn, Ldone); |
|
4576 delayed()->mov(G0, result); // not equal |
|
4577 |
|
4578 // load the lengths of arrays |
|
4579 ld(Address(ary1, length_offset), limit); |
|
4580 ld(Address(ary2, length_offset), tmp); |
|
4581 |
|
4582 // return false if the two arrays are not equal length |
|
4583 cmp(limit, tmp); |
|
4584 br(Assembler::notEqual, true, Assembler::pn, Ldone); |
|
4585 delayed()->mov(G0, result); // not equal |
|
4586 } |
|
4587 |
|
4588 cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn); |
|
4589 delayed()->add(G0, 1, result); // zero-length arrays are equal |
|
4590 |
|
4591 if (is_array_equ) { |
|
4592 // load array addresses |
|
4593 add(ary1, base_offset, ary1); |
|
4594 add(ary2, base_offset, ary2); |
|
4595 } else { |
|
4596 // We have no guarantee that on 64 bit the higher half of limit is 0 |
|
4597 signx(limit); |
|
4598 } |
|
4599 |
|
4600 if (is_byte) { |
|
4601 Label Lskip; |
|
4602 // check for trailing byte |
|
4603 andcc(limit, 0x1, tmp); |
|
4604 br(Assembler::zero, false, Assembler::pt, Lskip); |
|
4605 delayed()->nop(); |
|
4606 |
|
4607 // compare the trailing byte |
|
4608 sub(limit, sizeof(jbyte), limit); |
|
4609 ldub(ary1, limit, result); |
|
4610 ldub(ary2, limit, tmp); |
|
4611 cmp(result, tmp); |
|
4612 br(Assembler::notEqual, true, Assembler::pt, Ldone); |
|
4613 delayed()->mov(G0, result); // not equal |
|
4614 |
|
4615 // only one byte? |
|
4616 cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); |
|
4617 delayed()->add(G0, 1, result); // zero-length arrays are equal |
|
4618 bind(Lskip); |
|
4619 } else if (is_array_equ) { |
|
4620 // set byte count |
|
4621 sll(limit, exact_log2(sizeof(jchar)), limit); |
|
4622 } |
|
4623 |
|
4624 // check for trailing character |
|
4625 andcc(limit, 0x2, tmp); |
4265 br(Assembler::zero, false, Assembler::pt, Lvector); |
4626 br(Assembler::zero, false, Assembler::pt, Lvector); |
4266 delayed()->nop(); |
4627 delayed()->nop(); |
4267 |
4628 |
4268 // compare the trailing char |
4629 // compare the trailing char |
4269 sub(limit, sizeof(jchar), limit); |
4630 sub(limit, sizeof(jchar), limit); |
4270 lduh(ary1, limit, chr1); |
4631 lduh(ary1, limit, result); |
4271 lduh(ary2, limit, chr2); |
4632 lduh(ary2, limit, tmp); |
4272 cmp(chr1, chr2); |
4633 cmp(result, tmp); |
4273 br(Assembler::notEqual, true, Assembler::pt, Ldone); |
4634 br(Assembler::notEqual, true, Assembler::pt, Ldone); |
4274 delayed()->mov(G0, result); // not equal |
4635 delayed()->mov(G0, result); // not equal |
4275 |
4636 |
4276 // only one char ? |
4637 // only one char? |
4277 cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); |
4638 cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); |
4278 delayed()->add(G0, 1, result); // zero-length arrays are equal |
4639 delayed()->add(G0, 1, result); // zero-length arrays are equal |
4279 |
4640 |
4280 // word by word compare, dont't need alignment check |
4641 // word by word compare, dont't need alignment check |
4281 bind(Lvector); |
4642 bind(Lvector); |
4282 // Shift ary1 and ary2 to the end of the arrays, negate limit |
4643 // Shift ary1 and ary2 to the end of the arrays, negate limit |
4283 add(ary1, limit, ary1); |
4644 add(ary1, limit, ary1); |
4284 add(ary2, limit, ary2); |
4645 add(ary2, limit, ary2); |
4285 neg(limit, limit); |
4646 neg(limit, limit); |
4286 |
4647 |
4287 lduw(ary1, limit, chr1); |
4648 lduw(ary1, limit, result); |
4288 bind(Lloop); |
4649 bind(Lloop); |
4289 lduw(ary2, limit, chr2); |
4650 lduw(ary2, limit, tmp); |
4290 cmp(chr1, chr2); |
4651 cmp(result, tmp); |
4291 br(Assembler::notEqual, true, Assembler::pt, Ldone); |
4652 br(Assembler::notEqual, true, Assembler::pt, Ldone); |
4292 delayed()->mov(G0, result); // not equal |
4653 delayed()->mov(G0, result); // not equal |
4293 inccc(limit, 2*sizeof(jchar)); |
4654 inccc(limit, 2*sizeof(jchar)); |
4294 // annul LDUW if branch is not taken to prevent access past end of array |
4655 // annul LDUW if branch is not taken to prevent access past end of array |
4295 br(Assembler::notZero, true, Assembler::pt, Lloop); |
4656 br(Assembler::notZero, true, Assembler::pt, Lloop); |
4296 delayed()->lduw(ary1, limit, chr1); // hoisted |
4657 delayed()->lduw(ary1, limit, result); // hoisted |
4297 |
4658 |
4298 // Caller should set it: |
4659 add(G0, 1, result); // equals |
4299 // add(G0, 1, result); // equals |
4660 bind(Ldone); |
4300 } |
4661 } |
|
4662 |
|
4663 #endif |
4301 |
4664 |
4302 // Use BIS for zeroing (count is in bytes). |
4665 // Use BIS for zeroing (count is in bytes). |
4303 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { |
4666 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { |
4304 assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing"); |
4667 assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing"); |
4305 Register end = count; |
4668 Register end = count; |