6297 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM |
6300 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM |
6298 rep_stos(); |
6301 rep_stos(); |
6299 } |
6302 } |
6300 } |
6303 } |
6301 |
6304 |
|
6305 #ifdef COMPILER2 |
|
6306 |
6302 // IndexOf for constant substrings with size >= 8 chars |
6307 // IndexOf for constant substrings with size >= 8 chars |
6303 // which don't need to be loaded through stack. |
6308 // which don't need to be loaded through stack. |
6304 void MacroAssembler::string_indexofC8(Register str1, Register str2, |
6309 void MacroAssembler::string_indexofC8(Register str1, Register str2, |
6305 Register cnt1, Register cnt2, |
6310 Register cnt1, Register cnt2, |
6306 int int_cnt2, Register result, |
6311 int int_cnt2, Register result, |
6307 XMMRegister vec, Register tmp) { |
6312 XMMRegister vec, Register tmp, |
|
6313 int ae) { |
6308 ShortBranchVerifier sbv(this); |
6314 ShortBranchVerifier sbv(this); |
6309 assert(UseSSE42Intrinsics, "SSE4.2 is required"); |
6315 assert(UseSSE42Intrinsics, "SSE4.2 is required"); |
6310 |
6316 assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); |
6311 // This method uses pcmpestri instruction with bound registers |
6317 |
|
6318 // This method uses the pcmpestri instruction with bound registers |
6312 // inputs: |
6319 // inputs: |
6313 // xmm - substring |
6320 // xmm - substring |
6314 // rax - substring length (elements count) |
6321 // rax - substring length (elements count) |
6315 // mem - scanned string |
6322 // mem - scanned string |
6316 // rdx - string length (elements count) |
6323 // rdx - string length (elements count) |
6317 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
6324 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
|
6325 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes) |
6318 // outputs: |
6326 // outputs: |
6319 // rcx - matched index in string |
6327 // rcx - matched index in string |
6320 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); |
6328 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); |
|
6329 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts |
|
6330 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8 |
|
6331 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2; |
|
6332 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1; |
6321 |
6333 |
6322 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, |
6334 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, |
6323 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, |
6335 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, |
6324 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; |
6336 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; |
6325 |
6337 |
6326 // Note, inline_string_indexOf() generates checks: |
6338 // Note, inline_string_indexOf() generates checks: |
6327 // if (substr.count > string.count) return -1; |
6339 // if (substr.count > string.count) return -1; |
6328 // if (substr.count == 0) return 0; |
6340 // if (substr.count == 0) return 0; |
6329 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); |
6341 assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars"); |
6330 |
6342 |
6331 // Load substring. |
6343 // Load substring. |
6332 movdqu(vec, Address(str2, 0)); |
6344 if (ae == StrIntrinsicNode::UL) { |
|
6345 pmovzxbw(vec, Address(str2, 0)); |
|
6346 } else { |
|
6347 movdqu(vec, Address(str2, 0)); |
|
6348 } |
6333 movl(cnt2, int_cnt2); |
6349 movl(cnt2, int_cnt2); |
6334 movptr(result, str1); // string addr |
6350 movptr(result, str1); // string addr |
6335 |
6351 |
6336 if (int_cnt2 > 8) { |
6352 if (int_cnt2 > stride) { |
6337 jmpb(SCAN_TO_SUBSTR); |
6353 jmpb(SCAN_TO_SUBSTR); |
6338 |
6354 |
6339 // Reload substr for rescan, this code |
6355 // Reload substr for rescan, this code |
6340 // is executed only for large substrings (> 8 chars) |
6356 // is executed only for large substrings (> 8 chars) |
6341 bind(RELOAD_SUBSTR); |
6357 bind(RELOAD_SUBSTR); |
6342 movdqu(vec, Address(str2, 0)); |
6358 if (ae == StrIntrinsicNode::UL) { |
|
6359 pmovzxbw(vec, Address(str2, 0)); |
|
6360 } else { |
|
6361 movdqu(vec, Address(str2, 0)); |
|
6362 } |
6343 negptr(cnt2); // Jumped here with negative cnt2, convert to positive |
6363 negptr(cnt2); // Jumped here with negative cnt2, convert to positive |
6344 |
6364 |
6345 bind(RELOAD_STR); |
6365 bind(RELOAD_STR); |
6346 // We came here after the beginning of the substring was |
6366 // We came here after the beginning of the substring was |
6347 // matched but the rest of it was not so we need to search |
6367 // matched but the rest of it was not so we need to search |
6356 |
6376 |
6357 decrementl(cnt1); // Shift to next element |
6377 decrementl(cnt1); // Shift to next element |
6358 cmpl(cnt1, cnt2); |
6378 cmpl(cnt1, cnt2); |
6359 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6379 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6360 |
6380 |
6361 addptr(result, 2); |
6381 addptr(result, (1<<scale1)); |
6362 |
6382 |
6363 } // (int_cnt2 > 8) |
6383 } // (int_cnt2 > 8) |
6364 |
6384 |
6365 // Scan string for start of substr in 16-byte vectors |
6385 // Scan string for start of substr in 16-byte vectors |
6366 bind(SCAN_TO_SUBSTR); |
6386 bind(SCAN_TO_SUBSTR); |
6367 pcmpestri(vec, Address(result, 0), 0x0d); |
6387 pcmpestri(vec, Address(result, 0), mode); |
6368 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 |
6388 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 |
6369 subl(cnt1, 8); |
6389 subl(cnt1, stride); |
6370 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string |
6390 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string |
6371 cmpl(cnt1, cnt2); |
6391 cmpl(cnt1, cnt2); |
6372 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6392 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6373 addptr(result, 16); |
6393 addptr(result, 16); |
6374 jmpb(SCAN_TO_SUBSTR); |
6394 jmpb(SCAN_TO_SUBSTR); |
6375 |
6395 |
6376 // Found a potential substr |
6396 // Found a potential substr |
6377 bind(FOUND_CANDIDATE); |
6397 bind(FOUND_CANDIDATE); |
6378 // Matched whole vector if first element matched (tmp(rcx) == 0). |
6398 // Matched whole vector if first element matched (tmp(rcx) == 0). |
6379 if (int_cnt2 == 8) { |
6399 if (int_cnt2 == stride) { |
6380 jccb(Assembler::overflow, RET_FOUND); // OF == 1 |
6400 jccb(Assembler::overflow, RET_FOUND); // OF == 1 |
6381 } else { // int_cnt2 > 8 |
6401 } else { // int_cnt2 > 8 |
6382 jccb(Assembler::overflow, FOUND_SUBSTR); |
6402 jccb(Assembler::overflow, FOUND_SUBSTR); |
6383 } |
6403 } |
6384 // After pcmpestri tmp(rcx) contains matched element index |
6404 // After pcmpestri tmp(rcx) contains matched element index |
6385 // Compute start addr of substr |
6405 // Compute start addr of substr |
6386 lea(result, Address(result, tmp, Address::times_2)); |
6406 lea(result, Address(result, tmp, scale1)); |
6387 |
6407 |
6388 // Make sure string is still long enough |
6408 // Make sure string is still long enough |
6389 subl(cnt1, tmp); |
6409 subl(cnt1, tmp); |
6390 cmpl(cnt1, cnt2); |
6410 cmpl(cnt1, cnt2); |
6391 if (int_cnt2 == 8) { |
6411 if (int_cnt2 == stride) { |
6392 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); |
6412 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); |
6393 } else { // int_cnt2 > 8 |
6413 } else { // int_cnt2 > 8 |
6394 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); |
6414 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); |
6395 } |
6415 } |
6396 // Left less then substring. |
6416 // Left less then substring. |
6397 |
6417 |
6398 bind(RET_NOT_FOUND); |
6418 bind(RET_NOT_FOUND); |
6399 movl(result, -1); |
6419 movl(result, -1); |
6400 jmpb(EXIT); |
6420 jmpb(EXIT); |
6401 |
6421 |
6402 if (int_cnt2 > 8) { |
6422 if (int_cnt2 > stride) { |
6403 // This code is optimized for the case when whole substring |
6423 // This code is optimized for the case when whole substring |
6404 // is matched if its head is matched. |
6424 // is matched if its head is matched. |
6405 bind(MATCH_SUBSTR_HEAD); |
6425 bind(MATCH_SUBSTR_HEAD); |
6406 pcmpestri(vec, Address(result, 0), 0x0d); |
6426 pcmpestri(vec, Address(result, 0), mode); |
6407 // Reload only string if does not match |
6427 // Reload only string if does not match |
6408 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 |
6428 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 |
6409 |
6429 |
6410 Label CONT_SCAN_SUBSTR; |
6430 Label CONT_SCAN_SUBSTR; |
6411 // Compare the rest of substring (> 8 chars). |
6431 // Compare the rest of substring (> 8 chars). |
6412 bind(FOUND_SUBSTR); |
6432 bind(FOUND_SUBSTR); |
6413 // First 8 chars are already matched. |
6433 // First 8 chars are already matched. |
6414 negptr(cnt2); |
6434 negptr(cnt2); |
6415 addptr(cnt2, 8); |
6435 addptr(cnt2, stride); |
6416 |
6436 |
6417 bind(SCAN_SUBSTR); |
6437 bind(SCAN_SUBSTR); |
6418 subl(cnt1, 8); |
6438 subl(cnt1, stride); |
6419 cmpl(cnt2, -8); // Do not read beyond substring |
6439 cmpl(cnt2, -stride); // Do not read beyond substring |
6420 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); |
6440 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); |
6421 // Back-up strings to avoid reading beyond substring: |
6441 // Back-up strings to avoid reading beyond substring: |
6422 // cnt1 = cnt1 - cnt2 + 8 |
6442 // cnt1 = cnt1 - cnt2 + 8 |
6423 addl(cnt1, cnt2); // cnt2 is negative |
6443 addl(cnt1, cnt2); // cnt2 is negative |
6424 addl(cnt1, 8); |
6444 addl(cnt1, stride); |
6425 movl(cnt2, 8); negptr(cnt2); |
6445 movl(cnt2, stride); negptr(cnt2); |
6426 bind(CONT_SCAN_SUBSTR); |
6446 bind(CONT_SCAN_SUBSTR); |
6427 if (int_cnt2 < (int)G) { |
6447 if (int_cnt2 < (int)G) { |
6428 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); |
6448 int tail_off1 = int_cnt2<<scale1; |
6429 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); |
6449 int tail_off2 = int_cnt2<<scale2; |
|
6450 if (ae == StrIntrinsicNode::UL) { |
|
6451 pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2)); |
|
6452 } else { |
|
6453 movdqu(vec, Address(str2, cnt2, scale2, tail_off2)); |
|
6454 } |
|
6455 pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode); |
6430 } else { |
6456 } else { |
6431 // calculate index in register to avoid integer overflow (int_cnt2*2) |
6457 // calculate index in register to avoid integer overflow (int_cnt2*2) |
6432 movl(tmp, int_cnt2); |
6458 movl(tmp, int_cnt2); |
6433 addptr(tmp, cnt2); |
6459 addptr(tmp, cnt2); |
6434 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); |
6460 if (ae == StrIntrinsicNode::UL) { |
6435 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); |
6461 pmovzxbw(vec, Address(str2, tmp, scale2, 0)); |
|
6462 } else { |
|
6463 movdqu(vec, Address(str2, tmp, scale2, 0)); |
|
6464 } |
|
6465 pcmpestri(vec, Address(result, tmp, scale1, 0), mode); |
6436 } |
6466 } |
6437 // Need to reload strings pointers if not matched whole vector |
6467 // Need to reload strings pointers if not matched whole vector |
6438 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 |
6468 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 |
6439 addptr(cnt2, 8); |
6469 addptr(cnt2, stride); |
6440 jcc(Assembler::negative, SCAN_SUBSTR); |
6470 jcc(Assembler::negative, SCAN_SUBSTR); |
6441 // Fall through if found full substring |
6471 // Fall through if found full substring |
6442 |
6472 |
6443 } // (int_cnt2 > 8) |
6473 } // (int_cnt2 > 8) |
6444 |
6474 |
6445 bind(RET_FOUND); |
6475 bind(RET_FOUND); |
6446 // Found result if we matched full small substring. |
6476 // Found result if we matched full small substring. |
6447 // Compute substr offset |
6477 // Compute substr offset |
6448 subptr(result, str1); |
6478 subptr(result, str1); |
6449 shrl(result, 1); // index |
6479 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { |
|
6480 shrl(result, 1); // index |
|
6481 } |
6450 bind(EXIT); |
6482 bind(EXIT); |
6451 |
6483 |
6452 } // string_indexofC8 |
6484 } // string_indexofC8 |
6453 |
6485 |
6454 // Small strings are loaded through stack if they cross page boundary. |
6486 // Small strings are loaded through stack if they cross page boundary. |
6455 void MacroAssembler::string_indexof(Register str1, Register str2, |
6487 void MacroAssembler::string_indexof(Register str1, Register str2, |
6456 Register cnt1, Register cnt2, |
6488 Register cnt1, Register cnt2, |
6457 int int_cnt2, Register result, |
6489 int int_cnt2, Register result, |
6458 XMMRegister vec, Register tmp) { |
6490 XMMRegister vec, Register tmp, |
|
6491 int ae) { |
6459 ShortBranchVerifier sbv(this); |
6492 ShortBranchVerifier sbv(this); |
6460 assert(UseSSE42Intrinsics, "SSE4.2 is required"); |
6493 assert(UseSSE42Intrinsics, "SSE4.2 is required"); |
|
6494 assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); |
|
6495 |
6461 // |
6496 // |
6462 // int_cnt2 is length of small (< 8 chars) constant substring |
6497 // int_cnt2 is length of small (< 8 chars) constant substring |
6463 // or (-1) for non constant substring in which case its length |
6498 // or (-1) for non constant substring in which case its length |
6464 // is in cnt2 register. |
6499 // is in cnt2 register. |
6465 // |
6500 // |
6466 // Note, inline_string_indexOf() generates checks: |
6501 // Note, inline_string_indexOf() generates checks: |
6467 // if (substr.count > string.count) return -1; |
6502 // if (substr.count > string.count) return -1; |
6468 // if (substr.count == 0) return 0; |
6503 // if (substr.count == 0) return 0; |
6469 // |
6504 // |
6470 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); |
6505 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8 |
6471 |
6506 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0"); |
6472 // This method uses pcmpestri instruction with bound registers |
6507 // This method uses the pcmpestri instruction with bound registers |
6473 // inputs: |
6508 // inputs: |
6474 // xmm - substring |
6509 // xmm - substring |
6475 // rax - substring length (elements count) |
6510 // rax - substring length (elements count) |
6476 // mem - scanned string |
6511 // mem - scanned string |
6477 // rdx - string length (elements count) |
6512 // rdx - string length (elements count) |
6478 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
6513 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
|
6514 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes) |
6479 // outputs: |
6515 // outputs: |
6480 // rcx - matched index in string |
6516 // rcx - matched index in string |
6481 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); |
6517 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); |
|
6518 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts |
|
6519 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2; |
|
6520 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1; |
6482 |
6521 |
6483 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, |
6522 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, |
6484 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, |
6523 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, |
6485 FOUND_CANDIDATE; |
6524 FOUND_CANDIDATE; |
6486 |
6525 |
6490 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; |
6529 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; |
6491 |
6530 |
6492 movptr(tmp, rsp); // save old SP |
6531 movptr(tmp, rsp); // save old SP |
6493 |
6532 |
6494 if (int_cnt2 > 0) { // small (< 8 chars) constant substring |
6533 if (int_cnt2 > 0) { // small (< 8 chars) constant substring |
6495 if (int_cnt2 == 1) { // One char |
6534 if (int_cnt2 == (1>>scale2)) { // One byte |
|
6535 assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding"); |
|
6536 load_unsigned_byte(result, Address(str2, 0)); |
|
6537 movdl(vec, result); // move 32 bits |
|
6538 } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) { // Three bytes |
|
6539 // Not enough header space in 32-bit VM: 12+3 = 15. |
|
6540 movl(result, Address(str2, -1)); |
|
6541 shrl(result, 8); |
|
6542 movdl(vec, result); // move 32 bits |
|
6543 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) { // One char |
6496 load_unsigned_short(result, Address(str2, 0)); |
6544 load_unsigned_short(result, Address(str2, 0)); |
6497 movdl(vec, result); // move 32 bits |
6545 movdl(vec, result); // move 32 bits |
6498 } else if (int_cnt2 == 2) { // Two chars |
6546 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars |
6499 movdl(vec, Address(str2, 0)); // move 32 bits |
6547 movdl(vec, Address(str2, 0)); // move 32 bits |
6500 } else if (int_cnt2 == 4) { // Four chars |
6548 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars |
6501 movq(vec, Address(str2, 0)); // move 64 bits |
6549 movq(vec, Address(str2, 0)); // move 64 bits |
6502 } else { // cnt2 = { 3, 5, 6, 7 } |
6550 } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7}) |
6503 // Array header size is 12 bytes in 32-bit VM |
6551 // Array header size is 12 bytes in 32-bit VM |
6504 // + 6 bytes for 3 chars == 18 bytes, |
6552 // + 6 bytes for 3 chars == 18 bytes, |
6505 // enough space to load vec and shift. |
6553 // enough space to load vec and shift. |
6506 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); |
6554 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); |
6507 movdqu(vec, Address(str2, (int_cnt2*2)-16)); |
6555 if (ae == StrIntrinsicNode::UL) { |
6508 psrldq(vec, 16-(int_cnt2*2)); |
6556 int tail_off = int_cnt2-8; |
|
6557 pmovzxbw(vec, Address(str2, tail_off)); |
|
6558 psrldq(vec, -2*tail_off); |
|
6559 } |
|
6560 else { |
|
6561 int tail_off = int_cnt2*(1<<scale2); |
|
6562 movdqu(vec, Address(str2, tail_off-16)); |
|
6563 psrldq(vec, 16-tail_off); |
|
6564 } |
6509 } |
6565 } |
6510 } else { // not constant substring |
6566 } else { // not constant substring |
6511 cmpl(cnt2, 8); |
6567 cmpl(cnt2, stride); |
6512 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough |
6568 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough |
6513 |
6569 |
6514 // We can read beyond string if srt+16 does not cross page boundary |
6570 // We can read beyond string if srt+16 does not cross page boundary |
6515 // since heaps are aligned and mapped by pages. |
6571 // since heaps are aligned and mapped by pages. |
6516 assert(os::vm_page_size() < (int)G, "default page should be small"); |
6572 assert(os::vm_page_size() < (int)G, "default page should be small"); |
6519 cmpl(result, (os::vm_page_size()-16)); |
6575 cmpl(result, (os::vm_page_size()-16)); |
6520 jccb(Assembler::belowEqual, CHECK_STR); |
6576 jccb(Assembler::belowEqual, CHECK_STR); |
6521 |
6577 |
6522 // Move small strings to stack to allow load 16 bytes into vec. |
6578 // Move small strings to stack to allow load 16 bytes into vec. |
6523 subptr(rsp, 16); |
6579 subptr(rsp, 16); |
6524 int stk_offset = wordSize-2; |
6580 int stk_offset = wordSize-(1<<scale2); |
6525 push(cnt2); |
6581 push(cnt2); |
6526 |
6582 |
6527 bind(COPY_SUBSTR); |
6583 bind(COPY_SUBSTR); |
6528 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); |
6584 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) { |
6529 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); |
6585 load_unsigned_byte(result, Address(str2, cnt2, scale2, -1)); |
|
6586 movb(Address(rsp, cnt2, scale2, stk_offset), result); |
|
6587 } else if (ae == StrIntrinsicNode::UU) { |
|
6588 load_unsigned_short(result, Address(str2, cnt2, scale2, -2)); |
|
6589 movw(Address(rsp, cnt2, scale2, stk_offset), result); |
|
6590 } |
6530 decrement(cnt2); |
6591 decrement(cnt2); |
6531 jccb(Assembler::notZero, COPY_SUBSTR); |
6592 jccb(Assembler::notZero, COPY_SUBSTR); |
6532 |
6593 |
6533 pop(cnt2); |
6594 pop(cnt2); |
6534 movptr(str2, rsp); // New substring address |
6595 movptr(str2, rsp); // New substring address |
6535 } // non constant |
6596 } // non constant |
6536 |
6597 |
6537 bind(CHECK_STR); |
6598 bind(CHECK_STR); |
6538 cmpl(cnt1, 8); |
6599 cmpl(cnt1, stride); |
6539 jccb(Assembler::aboveEqual, BIG_STRINGS); |
6600 jccb(Assembler::aboveEqual, BIG_STRINGS); |
6540 |
6601 |
6541 // Check cross page boundary. |
6602 // Check cross page boundary. |
6542 movl(result, str1); // We need only low 32 bits |
6603 movl(result, str1); // We need only low 32 bits |
6543 andl(result, (os::vm_page_size()-1)); |
6604 andl(result, (os::vm_page_size()-1)); |
6544 cmpl(result, (os::vm_page_size()-16)); |
6605 cmpl(result, (os::vm_page_size()-16)); |
6545 jccb(Assembler::belowEqual, BIG_STRINGS); |
6606 jccb(Assembler::belowEqual, BIG_STRINGS); |
6546 |
6607 |
6547 subptr(rsp, 16); |
6608 subptr(rsp, 16); |
6548 int stk_offset = -2; |
6609 int stk_offset = -(1<<scale1); |
6549 if (int_cnt2 < 0) { // not constant |
6610 if (int_cnt2 < 0) { // not constant |
6550 push(cnt2); |
6611 push(cnt2); |
6551 stk_offset += wordSize; |
6612 stk_offset += wordSize; |
6552 } |
6613 } |
6553 movl(cnt2, cnt1); |
6614 movl(cnt2, cnt1); |
6554 |
6615 |
6555 bind(COPY_STR); |
6616 bind(COPY_STR); |
6556 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); |
6617 if (ae == StrIntrinsicNode::LL) { |
6557 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); |
6618 load_unsigned_byte(result, Address(str1, cnt2, scale1, -1)); |
|
6619 movb(Address(rsp, cnt2, scale1, stk_offset), result); |
|
6620 } else { |
|
6621 load_unsigned_short(result, Address(str1, cnt2, scale1, -2)); |
|
6622 movw(Address(rsp, cnt2, scale1, stk_offset), result); |
|
6623 } |
6558 decrement(cnt2); |
6624 decrement(cnt2); |
6559 jccb(Assembler::notZero, COPY_STR); |
6625 jccb(Assembler::notZero, COPY_STR); |
6560 |
6626 |
6561 if (int_cnt2 < 0) { // not constant |
6627 if (int_cnt2 < 0) { // not constant |
6562 pop(cnt2); |
6628 pop(cnt2); |
6595 // Reload substr for rescan, this code |
6665 // Reload substr for rescan, this code |
6596 // is executed only for large substrings (> 8 chars) |
6666 // is executed only for large substrings (> 8 chars) |
6597 bind(RELOAD_SUBSTR); |
6667 bind(RELOAD_SUBSTR); |
6598 movptr(str2, Address(rsp, 2*wordSize)); |
6668 movptr(str2, Address(rsp, 2*wordSize)); |
6599 movl(cnt2, Address(rsp, 3*wordSize)); |
6669 movl(cnt2, Address(rsp, 3*wordSize)); |
6600 movdqu(vec, Address(str2, 0)); |
6670 if (ae == StrIntrinsicNode::UL) { |
|
6671 pmovzxbw(vec, Address(str2, 0)); |
|
6672 } else { |
|
6673 movdqu(vec, Address(str2, 0)); |
|
6674 } |
6601 // We came here after the beginning of the substring was |
6675 // We came here after the beginning of the substring was |
6602 // matched but the rest of it was not so we need to search |
6676 // matched but the rest of it was not so we need to search |
6603 // again. Start from the next element after the previous match. |
6677 // again. Start from the next element after the previous match. |
6604 subptr(str1, result); // Restore counter |
6678 subptr(str1, result); // Restore counter |
6605 shrl(str1, 1); |
6679 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { |
|
6680 shrl(str1, 1); |
|
6681 } |
6606 addl(cnt1, str1); |
6682 addl(cnt1, str1); |
6607 decrementl(cnt1); // Shift to next element |
6683 decrementl(cnt1); // Shift to next element |
6608 cmpl(cnt1, cnt2); |
6684 cmpl(cnt1, cnt2); |
6609 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6685 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6610 |
6686 |
6611 addptr(result, 2); |
6687 addptr(result, (1<<scale1)); |
6612 } // non constant |
6688 } // non constant |
6613 |
6689 |
6614 // Scan string for start of substr in 16-byte vectors |
6690 // Scan string for start of substr in 16-byte vectors |
6615 bind(SCAN_TO_SUBSTR); |
6691 bind(SCAN_TO_SUBSTR); |
6616 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); |
6692 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); |
6617 pcmpestri(vec, Address(result, 0), 0x0d); |
6693 pcmpestri(vec, Address(result, 0), mode); |
6618 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 |
6694 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 |
6619 subl(cnt1, 8); |
6695 subl(cnt1, stride); |
6620 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string |
6696 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string |
6621 cmpl(cnt1, cnt2); |
6697 cmpl(cnt1, cnt2); |
6622 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6698 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
6623 addptr(result, 16); |
6699 addptr(result, 16); |
6624 |
6700 |
6625 bind(ADJUST_STR); |
6701 bind(ADJUST_STR); |
6626 cmpl(cnt1, 8); // Do not read beyond string |
6702 cmpl(cnt1, stride); // Do not read beyond string |
6627 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); |
6703 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); |
6628 // Back-up string to avoid reading beyond string. |
6704 // Back-up string to avoid reading beyond string. |
6629 lea(result, Address(result, cnt1, Address::times_2, -16)); |
6705 lea(result, Address(result, cnt1, scale1, -16)); |
6630 movl(cnt1, 8); |
6706 movl(cnt1, stride); |
6631 jmpb(SCAN_TO_SUBSTR); |
6707 jmpb(SCAN_TO_SUBSTR); |
6632 |
6708 |
6633 // Found a potential substr |
6709 // Found a potential substr |
6634 bind(FOUND_CANDIDATE); |
6710 bind(FOUND_CANDIDATE); |
6635 // After pcmpestri tmp(rcx) contains matched element index |
6711 // After pcmpestri tmp(rcx) contains matched element index |
6676 cmpl(tmp, cnt2); |
6751 cmpl(tmp, cnt2); |
6677 // First 8 chars are already matched. |
6752 // First 8 chars are already matched. |
6678 jccb(Assembler::equal, CHECK_NEXT); |
6753 jccb(Assembler::equal, CHECK_NEXT); |
6679 |
6754 |
6680 bind(SCAN_SUBSTR); |
6755 bind(SCAN_SUBSTR); |
6681 pcmpestri(vec, Address(str1, 0), 0x0d); |
6756 pcmpestri(vec, Address(str1, 0), mode); |
6682 // Need to reload strings pointers if not matched whole vector |
6757 // Need to reload strings pointers if not matched whole vector |
6683 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 |
6758 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 |
6684 |
6759 |
6685 bind(CHECK_NEXT); |
6760 bind(CHECK_NEXT); |
6686 subl(cnt2, 8); |
6761 subl(cnt2, stride); |
6687 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring |
6762 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring |
6688 addptr(str1, 16); |
6763 addptr(str1, 16); |
6689 addptr(str2, 16); |
6764 if (ae == StrIntrinsicNode::UL) { |
6690 subl(cnt1, 8); |
6765 addptr(str2, 8); |
6691 cmpl(cnt2, 8); // Do not read beyond substring |
6766 } else { |
|
6767 addptr(str2, 16); |
|
6768 } |
|
6769 subl(cnt1, stride); |
|
6770 cmpl(cnt2, stride); // Do not read beyond substring |
6692 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); |
6771 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); |
6693 // Back-up strings to avoid reading beyond substring. |
6772 // Back-up strings to avoid reading beyond substring. |
6694 lea(str2, Address(str2, cnt2, Address::times_2, -16)); |
6773 |
6695 lea(str1, Address(str1, cnt2, Address::times_2, -16)); |
6774 if (ae == StrIntrinsicNode::UL) { |
|
6775 lea(str2, Address(str2, cnt2, scale2, -8)); |
|
6776 lea(str1, Address(str1, cnt2, scale1, -16)); |
|
6777 } else { |
|
6778 lea(str2, Address(str2, cnt2, scale2, -16)); |
|
6779 lea(str1, Address(str1, cnt2, scale1, -16)); |
|
6780 } |
6696 subl(cnt1, cnt2); |
6781 subl(cnt1, cnt2); |
6697 movl(cnt2, 8); |
6782 movl(cnt2, stride); |
6698 addl(cnt1, 8); |
6783 addl(cnt1, stride); |
6699 bind(CONT_SCAN_SUBSTR); |
6784 bind(CONT_SCAN_SUBSTR); |
6700 movdqu(vec, Address(str2, 0)); |
6785 if (ae == StrIntrinsicNode::UL) { |
|
6786 pmovzxbw(vec, Address(str2, 0)); |
|
6787 } else { |
|
6788 movdqu(vec, Address(str2, 0)); |
|
6789 } |
6701 jmpb(SCAN_SUBSTR); |
6790 jmpb(SCAN_SUBSTR); |
6702 |
6791 |
6703 bind(RET_FOUND_LONG); |
6792 bind(RET_FOUND_LONG); |
6704 movptr(str1, Address(rsp, wordSize)); |
6793 movptr(str1, Address(rsp, wordSize)); |
6705 } // non constant |
6794 } // non constant |
6706 |
6795 |
6707 bind(RET_FOUND); |
6796 bind(RET_FOUND); |
6708 // Compute substr offset |
6797 // Compute substr offset |
6709 subptr(result, str1); |
6798 subptr(result, str1); |
6710 shrl(result, 1); // index |
6799 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { |
6711 |
6800 shrl(result, 1); // index |
|
6801 } |
6712 bind(CLEANUP); |
6802 bind(CLEANUP); |
6713 pop(rsp); // restore SP |
6803 pop(rsp); // restore SP |
6714 |
6804 |
6715 } // string_indexof |
6805 } // string_indexof |
6716 |
6806 |
6717 // Compare strings. |
6807 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result, |
|
6808 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { |
|
6809 ShortBranchVerifier sbv(this); |
|
6810 assert(UseSSE42Intrinsics, "SSE4.2 is required"); |
|
6811 |
|
6812 int stride = 8; |
|
6813 |
|
6814 Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP, |
|
6815 SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP, |
|
6816 RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT, |
|
6817 FOUND_SEQ_CHAR, DONE_LABEL; |
|
6818 |
|
6819 movptr(result, str1); |
|
6820 if (UseAVX >= 2) { |
|
6821 cmpl(cnt1, stride); |
|
6822 jccb(Assembler::less, SCAN_TO_CHAR_LOOP); |
|
6823 cmpl(cnt1, 2*stride); |
|
6824 jccb(Assembler::less, SCAN_TO_8_CHAR_INIT); |
|
6825 movdl(vec1, ch); |
|
6826 vpbroadcastw(vec1, vec1); |
|
6827 vpxor(vec2, vec2); |
|
6828 movl(tmp, cnt1); |
|
6829 andl(tmp, 0xFFFFFFF0); //vector count (in chars) |
|
6830 andl(cnt1,0x0000000F); //tail count (in chars) |
|
6831 |
|
6832 bind(SCAN_TO_16_CHAR_LOOP); |
|
6833 vmovdqu(vec3, Address(result, 0)); |
|
6834 vpcmpeqw(vec3, vec3, vec1, true); |
|
6835 vptest(vec2, vec3); |
|
6836 jcc(Assembler::carryClear, FOUND_CHAR); |
|
6837 addptr(result, 32); |
|
6838 subl(tmp, 2*stride); |
|
6839 jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP); |
|
6840 jmp(SCAN_TO_8_CHAR); |
|
6841 bind(SCAN_TO_8_CHAR_INIT); |
|
6842 movdl(vec1, ch); |
|
6843 pshuflw(vec1, vec1, 0x00); |
|
6844 pshufd(vec1, vec1, 0); |
|
6845 pxor(vec2, vec2); |
|
6846 } |
|
6847 if (UseAVX >= 2 || UseSSE42Intrinsics) { |
|
6848 bind(SCAN_TO_8_CHAR); |
|
6849 cmpl(cnt1, stride); |
|
6850 if (UseAVX >= 2) { |
|
6851 jccb(Assembler::less, SCAN_TO_CHAR); |
|
6852 } |
|
6853 if (!(UseAVX >= 2)) { |
|
6854 jccb(Assembler::less, SCAN_TO_CHAR_LOOP); |
|
6855 movdl(vec1, ch); |
|
6856 pshuflw(vec1, vec1, 0x00); |
|
6857 pshufd(vec1, vec1, 0); |
|
6858 pxor(vec2, vec2); |
|
6859 } |
|
6860 movl(tmp, cnt1); |
|
6861 andl(tmp, 0xFFFFFFF8); //vector count (in chars) |
|
6862 andl(cnt1,0x00000007); //tail count (in chars) |
|
6863 |
|
6864 bind(SCAN_TO_8_CHAR_LOOP); |
|
6865 movdqu(vec3, Address(result, 0)); |
|
6866 pcmpeqw(vec3, vec1); |
|
6867 ptest(vec2, vec3); |
|
6868 jcc(Assembler::carryClear, FOUND_CHAR); |
|
6869 addptr(result, 16); |
|
6870 subl(tmp, stride); |
|
6871 jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); |
|
6872 } |
|
6873 bind(SCAN_TO_CHAR); |
|
6874 testl(cnt1, cnt1); |
|
6875 jcc(Assembler::zero, RET_NOT_FOUND); |
|
6876 |
|
6877 bind(SCAN_TO_CHAR_LOOP); |
|
6878 load_unsigned_short(tmp, Address(result, 0)); |
|
6879 cmpl(ch, tmp); |
|
6880 jccb(Assembler::equal, FOUND_SEQ_CHAR); |
|
6881 addptr(result, 2); |
|
6882 subl(cnt1, 1); |
|
6883 jccb(Assembler::zero, RET_NOT_FOUND); |
|
6884 jmp(SCAN_TO_CHAR_LOOP); |
|
6885 |
|
6886 bind(RET_NOT_FOUND); |
|
6887 movl(result, -1); |
|
6888 jmpb(DONE_LABEL); |
|
6889 |
|
6890 if (UseAVX >= 2 || UseSSE42Intrinsics) { |
|
6891 bind(FOUND_CHAR); |
|
6892 if (UseAVX >= 2) { |
|
6893 vpmovmskb(tmp, vec3); |
|
6894 } else { |
|
6895 pmovmskb(tmp, vec3); |
|
6896 } |
|
6897 bsfl(ch, tmp); |
|
6898 addl(result, ch); |
|
6899 } |
|
6900 |
|
6901 bind(FOUND_SEQ_CHAR); |
|
6902 subptr(result, str1); |
|
6903 shrl(result, 1); |
|
6904 |
|
6905 bind(DONE_LABEL); |
|
6906 } // string_indexof_char |
|
6907 |
|
6908 // helper function for string_compare |
|
6909 void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, |
|
6910 Address::ScaleFactor scale, Address::ScaleFactor scale1, |
|
6911 Address::ScaleFactor scale2, Register index, int ae) { |
|
6912 if (ae == StrIntrinsicNode::LL) { |
|
6913 load_unsigned_byte(elem1, Address(str1, index, scale, 0)); |
|
6914 load_unsigned_byte(elem2, Address(str2, index, scale, 0)); |
|
6915 } else if (ae == StrIntrinsicNode::UU) { |
|
6916 load_unsigned_short(elem1, Address(str1, index, scale, 0)); |
|
6917 load_unsigned_short(elem2, Address(str2, index, scale, 0)); |
|
6918 } else { |
|
6919 load_unsigned_byte(elem1, Address(str1, index, scale1, 0)); |
|
6920 load_unsigned_short(elem2, Address(str2, index, scale2, 0)); |
|
6921 } |
|
6922 } |
|
6923 |
|
6924 // Compare strings, used for char[] and byte[]. |
6718 void MacroAssembler::string_compare(Register str1, Register str2, |
6925 void MacroAssembler::string_compare(Register str1, Register str2, |
6719 Register cnt1, Register cnt2, Register result, |
6926 Register cnt1, Register cnt2, Register result, |
6720 XMMRegister vec1) { |
6927 XMMRegister vec1, int ae) { |
6721 ShortBranchVerifier sbv(this); |
6928 ShortBranchVerifier sbv(this); |
6722 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; |
6929 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; |
6723 |
6930 int stride, stride2, adr_stride, adr_stride1, adr_stride2; |
|
6931 Address::ScaleFactor scale, scale1, scale2; |
|
6932 |
|
6933 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
|
6934 shrl(cnt2, 1); |
|
6935 } |
6724 // Compute the minimum of the string lengths and the |
6936 // Compute the minimum of the string lengths and the |
6725 // difference of the string lengths (stack). |
6937 // difference of the string lengths (stack). |
6726 // Do the conditional move stuff |
6938 // Do the conditional move stuff |
6727 movl(result, cnt1); |
6939 movl(result, cnt1); |
6728 subl(cnt1, cnt2); |
6940 subl(cnt1, cnt2); |
6730 cmov32(Assembler::lessEqual, cnt2, result); |
6942 cmov32(Assembler::lessEqual, cnt2, result); |
6731 |
6943 |
6732 // Is the minimum length zero? |
6944 // Is the minimum length zero? |
6733 testl(cnt2, cnt2); |
6945 testl(cnt2, cnt2); |
6734 jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
6946 jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
6735 |
6947 if (ae == StrIntrinsicNode::LL) { |
6736 // Compare first characters |
6948 // Load first bytes |
6737 load_unsigned_short(result, Address(str1, 0)); |
6949 load_unsigned_byte(result, Address(str1, 0)); |
6738 load_unsigned_short(cnt1, Address(str2, 0)); |
6950 load_unsigned_byte(cnt1, Address(str2, 0)); |
|
6951 } else if (ae == StrIntrinsicNode::UU) { |
|
6952 // Load first characters |
|
6953 load_unsigned_short(result, Address(str1, 0)); |
|
6954 load_unsigned_short(cnt1, Address(str2, 0)); |
|
6955 } else { |
|
6956 load_unsigned_byte(result, Address(str1, 0)); |
|
6957 load_unsigned_short(cnt1, Address(str2, 0)); |
|
6958 } |
6739 subl(result, cnt1); |
6959 subl(result, cnt1); |
6740 jcc(Assembler::notZero, POP_LABEL); |
6960 jcc(Assembler::notZero, POP_LABEL); |
|
6961 |
|
6962 if (ae == StrIntrinsicNode::UU) { |
|
6963 // Divide length by 2 to get number of chars |
|
6964 shrl(cnt2, 1); |
|
6965 } |
6741 cmpl(cnt2, 1); |
6966 cmpl(cnt2, 1); |
6742 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
6967 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
6743 |
6968 |
6744 // Check if the strings start at the same location. |
6969 // Check if the strings start at the same location and setup scale and stride |
6745 cmpptr(str1, str2); |
6970 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6746 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
6971 cmpptr(str1, str2); |
6747 |
6972 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
6748 Address::ScaleFactor scale = Address::times_2; |
6973 if (ae == StrIntrinsicNode::LL) { |
6749 int stride = 8; |
6974 scale = Address::times_1; |
|
6975 stride = 16; |
|
6976 } else { |
|
6977 scale = Address::times_2; |
|
6978 stride = 8; |
|
6979 } |
|
6980 } else { |
|
6981 scale1 = Address::times_1; |
|
6982 scale2 = Address::times_2; |
|
6983 stride = 8; |
|
6984 } |
6750 |
6985 |
6751 if (UseAVX >= 2 && UseSSE42Intrinsics) { |
6986 if (UseAVX >= 2 && UseSSE42Intrinsics) { |
6752 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; |
6987 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; |
6753 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; |
6988 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; |
6754 Label COMPARE_TAIL_LONG; |
6989 Label COMPARE_TAIL_LONG; |
6755 int pcmpmask = 0x19; |
6990 int pcmpmask = 0x19; |
|
6991 if (ae == StrIntrinsicNode::LL) { |
|
6992 pcmpmask &= ~0x01; |
|
6993 } |
6756 |
6994 |
6757 // Setup to compare 16-chars (32-bytes) vectors, |
6995 // Setup to compare 16-chars (32-bytes) vectors, |
6758 // start from first character again because it has aligned address. |
6996 // start from first character again because it has aligned address. |
6759 int stride2 = 16; |
6997 if (ae == StrIntrinsicNode::LL) { |
6760 int adr_stride = stride << scale; |
6998 stride2 = 32; |
|
6999 } else { |
|
7000 stride2 = 16; |
|
7001 } |
|
7002 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
|
7003 adr_stride = stride << scale; |
|
7004 } else { |
|
7005 adr_stride1 = 8; //stride << scale1; |
|
7006 adr_stride2 = 16; //stride << scale2; |
|
7007 } |
6761 |
7008 |
6762 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); |
7009 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); |
6763 // rax and rdx are used by pcmpestri as elements counters |
7010 // rax and rdx are used by pcmpestri as elements counters |
6764 movl(result, cnt2); |
7011 movl(result, cnt2); |
6765 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count |
7012 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count |
6766 jcc(Assembler::zero, COMPARE_TAIL_LONG); |
7013 jcc(Assembler::zero, COMPARE_TAIL_LONG); |
6767 |
7014 |
6768 // fast path : compare first 2 8-char vectors. |
7015 // fast path : compare first 2 8-char vectors. |
6769 bind(COMPARE_16_CHARS); |
7016 bind(COMPARE_16_CHARS); |
6770 movdqu(vec1, Address(str1, 0)); |
7017 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
|
7018 movdqu(vec1, Address(str1, 0)); |
|
7019 } else { |
|
7020 pmovzxbw(vec1, Address(str1, 0)); |
|
7021 } |
6771 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
7022 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
6772 jccb(Assembler::below, COMPARE_INDEX_CHAR); |
7023 jccb(Assembler::below, COMPARE_INDEX_CHAR); |
6773 |
7024 |
6774 movdqu(vec1, Address(str1, adr_stride)); |
7025 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6775 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); |
7026 movdqu(vec1, Address(str1, adr_stride)); |
|
7027 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); |
|
7028 } else { |
|
7029 pmovzxbw(vec1, Address(str1, adr_stride1)); |
|
7030 pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask); |
|
7031 } |
6776 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); |
7032 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); |
6777 addl(cnt1, stride); |
7033 addl(cnt1, stride); |
6778 |
7034 |
6779 // Compare the characters at index in cnt1 |
7035 // Compare the characters at index in cnt1 |
6780 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character |
7036 bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character |
6781 load_unsigned_short(result, Address(str1, cnt1, scale)); |
7037 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae); |
6782 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
|
6783 subl(result, cnt2); |
7038 subl(result, cnt2); |
6784 jmp(POP_LABEL); |
7039 jmp(POP_LABEL); |
6785 |
7040 |
6786 // Setup the registers to start vector comparison loop |
7041 // Setup the registers to start vector comparison loop |
6787 bind(COMPARE_WIDE_VECTORS); |
7042 bind(COMPARE_WIDE_VECTORS); |
6788 lea(str1, Address(str1, result, scale)); |
7043 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6789 lea(str2, Address(str2, result, scale)); |
7044 lea(str1, Address(str1, result, scale)); |
|
7045 lea(str2, Address(str2, result, scale)); |
|
7046 } else { |
|
7047 lea(str1, Address(str1, result, scale1)); |
|
7048 lea(str2, Address(str2, result, scale2)); |
|
7049 } |
6790 subl(result, stride2); |
7050 subl(result, stride2); |
6791 subl(cnt2, stride2); |
7051 subl(cnt2, stride2); |
6792 jccb(Assembler::zero, COMPARE_WIDE_TAIL); |
7052 jccb(Assembler::zero, COMPARE_WIDE_TAIL); |
6793 negptr(result); |
7053 negptr(result); |
6794 |
7054 |
6795 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) |
7055 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) |
6796 bind(COMPARE_WIDE_VECTORS_LOOP); |
7056 bind(COMPARE_WIDE_VECTORS_LOOP); |
6797 vmovdqu(vec1, Address(str1, result, scale)); |
7057 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6798 vpxor(vec1, Address(str2, result, scale)); |
7058 vmovdqu(vec1, Address(str1, result, scale)); |
|
7059 vpxor(vec1, Address(str2, result, scale)); |
|
7060 } else { |
|
7061 vpmovzxbw(vec1, Address(str1, result, scale1)); |
|
7062 vpxor(vec1, Address(str2, result, scale2)); |
|
7063 } |
6799 vptest(vec1, vec1); |
7064 vptest(vec1, vec1); |
6800 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
7065 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
6801 addptr(result, stride2); |
7066 addptr(result, stride2); |
6802 subl(cnt2, stride2); |
7067 subl(cnt2, stride2); |
6803 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
7068 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
6816 |
7081 |
6817 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
7082 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
6818 bind(VECTOR_NOT_EQUAL); |
7083 bind(VECTOR_NOT_EQUAL); |
6819 // clean upper bits of YMM registers |
7084 // clean upper bits of YMM registers |
6820 vpxor(vec1, vec1); |
7085 vpxor(vec1, vec1); |
6821 lea(str1, Address(str1, result, scale)); |
7086 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6822 lea(str2, Address(str2, result, scale)); |
7087 lea(str1, Address(str1, result, scale)); |
|
7088 lea(str2, Address(str2, result, scale)); |
|
7089 } else { |
|
7090 lea(str1, Address(str1, result, scale1)); |
|
7091 lea(str2, Address(str2, result, scale2)); |
|
7092 } |
6823 jmp(COMPARE_16_CHARS); |
7093 jmp(COMPARE_16_CHARS); |
6824 |
7094 |
6825 // Compare tail chars, length between 1 to 15 chars |
7095 // Compare tail chars, length between 1 to 15 chars |
6826 bind(COMPARE_TAIL_LONG); |
7096 bind(COMPARE_TAIL_LONG); |
6827 movl(cnt2, result); |
7097 movl(cnt2, result); |
6828 cmpl(cnt2, stride); |
7098 cmpl(cnt2, stride); |
6829 jccb(Assembler::less, COMPARE_SMALL_STR); |
7099 jccb(Assembler::less, COMPARE_SMALL_STR); |
6830 |
7100 |
6831 movdqu(vec1, Address(str1, 0)); |
7101 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
|
7102 movdqu(vec1, Address(str1, 0)); |
|
7103 } else { |
|
7104 pmovzxbw(vec1, Address(str1, 0)); |
|
7105 } |
6832 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
7106 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
6833 jcc(Assembler::below, COMPARE_INDEX_CHAR); |
7107 jcc(Assembler::below, COMPARE_INDEX_CHAR); |
6834 subptr(cnt2, stride); |
7108 subptr(cnt2, stride); |
6835 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
7109 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
6836 lea(str1, Address(str1, result, scale)); |
7110 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6837 lea(str2, Address(str2, result, scale)); |
7111 lea(str1, Address(str1, result, scale)); |
|
7112 lea(str2, Address(str2, result, scale)); |
|
7113 } else { |
|
7114 lea(str1, Address(str1, result, scale1)); |
|
7115 lea(str2, Address(str2, result, scale2)); |
|
7116 } |
6838 negptr(cnt2); |
7117 negptr(cnt2); |
6839 jmpb(WHILE_HEAD_LABEL); |
7118 jmpb(WHILE_HEAD_LABEL); |
6840 |
7119 |
6841 bind(COMPARE_SMALL_STR); |
7120 bind(COMPARE_SMALL_STR); |
6842 } else if (UseSSE42Intrinsics) { |
7121 } else if (UseSSE42Intrinsics) { |
6879 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
7170 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
6880 |
7171 |
6881 movl(cnt2, stride); |
7172 movl(cnt2, stride); |
6882 movl(result, stride); |
7173 movl(result, stride); |
6883 negptr(result); |
7174 negptr(result); |
6884 movdqu(vec1, Address(str1, result, scale)); |
7175 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6885 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); |
7176 movdqu(vec1, Address(str1, result, scale)); |
|
7177 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); |
|
7178 } else { |
|
7179 pmovzxbw(vec1, Address(str1, result, scale1)); |
|
7180 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask); |
|
7181 } |
6886 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); |
7182 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); |
6887 |
7183 |
6888 // Mismatched characters in the vectors |
7184 // Mismatched characters in the vectors |
6889 bind(VECTOR_NOT_EQUAL); |
7185 bind(VECTOR_NOT_EQUAL); |
6890 addptr(cnt1, result); |
7186 addptr(cnt1, result); |
6891 load_unsigned_short(result, Address(str1, cnt1, scale)); |
7187 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae); |
6892 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
|
6893 subl(result, cnt2); |
7188 subl(result, cnt2); |
6894 jmpb(POP_LABEL); |
7189 jmpb(POP_LABEL); |
6895 |
7190 |
6896 bind(COMPARE_TAIL); // limit is zero |
7191 bind(COMPARE_TAIL); // limit is zero |
6897 movl(cnt2, result); |
7192 movl(cnt2, result); |
6898 // Fallthru to tail compare |
7193 // Fallthru to tail compare |
6899 } |
7194 } |
6900 // Shift str2 and str1 to the end of the arrays, negate min |
7195 // Shift str2 and str1 to the end of the arrays, negate min |
6901 lea(str1, Address(str1, cnt2, scale)); |
7196 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
6902 lea(str2, Address(str2, cnt2, scale)); |
7197 lea(str1, Address(str1, cnt2, scale)); |
|
7198 lea(str2, Address(str2, cnt2, scale)); |
|
7199 } else { |
|
7200 lea(str1, Address(str1, cnt2, scale1)); |
|
7201 lea(str2, Address(str2, cnt2, scale2)); |
|
7202 } |
6903 decrementl(cnt2); // first character was compared already |
7203 decrementl(cnt2); // first character was compared already |
6904 negptr(cnt2); |
7204 negptr(cnt2); |
6905 |
7205 |
6906 // Compare the rest of the elements |
7206 // Compare the rest of the elements |
6907 bind(WHILE_HEAD_LABEL); |
7207 bind(WHILE_HEAD_LABEL); |
6908 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); |
7208 load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae); |
6909 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); |
|
6910 subl(result, cnt1); |
7209 subl(result, cnt1); |
6911 jccb(Assembler::notZero, POP_LABEL); |
7210 jccb(Assembler::notZero, POP_LABEL); |
6912 increment(cnt2); |
7211 increment(cnt2); |
6913 jccb(Assembler::notZero, WHILE_HEAD_LABEL); |
7212 jccb(Assembler::notZero, WHILE_HEAD_LABEL); |
6914 |
7213 |
6915 // Strings are equal up to min length. Return the length difference. |
7214 // Strings are equal up to min length. Return the length difference. |
6916 bind(LENGTH_DIFF_LABEL); |
7215 bind(LENGTH_DIFF_LABEL); |
6917 pop(result); |
7216 pop(result); |
|
7217 if (ae == StrIntrinsicNode::UU) { |
|
7218 // Divide diff by 2 to get number of chars |
|
7219 sarl(result, 1); |
|
7220 } |
6918 jmpb(DONE_LABEL); |
7221 jmpb(DONE_LABEL); |
6919 |
7222 |
6920 // Discard the stored length difference |
7223 // Discard the stored length difference |
6921 bind(POP_LABEL); |
7224 bind(POP_LABEL); |
6922 pop(cnt1); |
7225 pop(cnt1); |
6923 |
7226 |
6924 // That's it |
7227 // That's it |
6925 bind(DONE_LABEL); |
7228 bind(DONE_LABEL); |
6926 } |
7229 if(ae == StrIntrinsicNode::UL) { |
6927 |
7230 negl(result); |
6928 // Compare char[] arrays aligned to 4 bytes or substrings. |
7231 } |
6929 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, |
7232 } |
6930 Register limit, Register result, Register chr, |
7233 |
6931 XMMRegister vec1, XMMRegister vec2) { |
7234 // Search for Non-ASCII character (Negative byte value) in a byte array, |
|
7235 // return true if it has any and false otherwise. |
|
7236 void MacroAssembler::has_negatives(Register ary1, Register len, |
|
7237 Register result, Register tmp1, |
|
7238 XMMRegister vec1, XMMRegister vec2) { |
|
7239 |
|
7240 // rsi: byte array |
|
7241 // rcx: len |
|
7242 // rax: result |
6932 ShortBranchVerifier sbv(this); |
7243 ShortBranchVerifier sbv(this); |
6933 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; |
7244 assert_different_registers(ary1, len, result, tmp1); |
|
7245 assert_different_registers(vec1, vec2); |
|
7246 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE; |
|
7247 |
|
7248 // len == 0 |
|
7249 testl(len, len); |
|
7250 jcc(Assembler::zero, FALSE_LABEL); |
|
7251 |
|
7252 movl(result, len); // copy |
|
7253 |
|
7254 if (UseAVX >= 2) { |
|
7255 // With AVX2, use 32-byte vector compare |
|
7256 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
|
7257 |
|
7258 // Compare 32-byte vectors |
|
7259 andl(result, 0x0000001f); // tail count (in bytes) |
|
7260 andl(len, 0xffffffe0); // vector count (in bytes) |
|
7261 jccb(Assembler::zero, COMPARE_TAIL); |
|
7262 |
|
7263 lea(ary1, Address(ary1, len, Address::times_1)); |
|
7264 negptr(len); |
|
7265 |
|
7266 movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector |
|
7267 movdl(vec2, tmp1); |
|
7268 vpbroadcastd(vec2, vec2); |
|
7269 |
|
7270 bind(COMPARE_WIDE_VECTORS); |
|
7271 vmovdqu(vec1, Address(ary1, len, Address::times_1)); |
|
7272 vptest(vec1, vec2); |
|
7273 jccb(Assembler::notZero, TRUE_LABEL); |
|
7274 addptr(len, 32); |
|
7275 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
|
7276 |
|
7277 testl(result, result); |
|
7278 jccb(Assembler::zero, FALSE_LABEL); |
|
7279 |
|
7280 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); |
|
7281 vptest(vec1, vec2); |
|
7282 jccb(Assembler::notZero, TRUE_LABEL); |
|
7283 jmpb(FALSE_LABEL); |
|
7284 |
|
7285 bind(COMPARE_TAIL); // len is zero |
|
7286 movl(len, result); |
|
7287 // Fallthru to tail compare |
|
7288 } else if (UseSSE42Intrinsics) { |
|
7289 // With SSE4.2, use double quad vector compare |
|
7290 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
|
7291 |
|
7292 // Compare 16-byte vectors |
|
7293 andl(result, 0x0000000f); // tail count (in bytes) |
|
7294 andl(len, 0xfffffff0); // vector count (in bytes) |
|
7295 jccb(Assembler::zero, COMPARE_TAIL); |
|
7296 |
|
7297 lea(ary1, Address(ary1, len, Address::times_1)); |
|
7298 negptr(len); |
|
7299 |
|
7300 movl(tmp1, 0x80808080); |
|
7301 movdl(vec2, tmp1); |
|
7302 pshufd(vec2, vec2, 0); |
|
7303 |
|
7304 bind(COMPARE_WIDE_VECTORS); |
|
7305 movdqu(vec1, Address(ary1, len, Address::times_1)); |
|
7306 ptest(vec1, vec2); |
|
7307 jccb(Assembler::notZero, TRUE_LABEL); |
|
7308 addptr(len, 16); |
|
7309 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
|
7310 |
|
7311 testl(result, result); |
|
7312 jccb(Assembler::zero, FALSE_LABEL); |
|
7313 |
|
7314 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); |
|
7315 ptest(vec1, vec2); |
|
7316 jccb(Assembler::notZero, TRUE_LABEL); |
|
7317 jmpb(FALSE_LABEL); |
|
7318 |
|
7319 bind(COMPARE_TAIL); // len is zero |
|
7320 movl(len, result); |
|
7321 // Fallthru to tail compare |
|
7322 } |
|
7323 |
|
7324 // Compare 4-byte vectors |
|
7325 andl(len, 0xfffffffc); // vector count (in bytes) |
|
7326 jccb(Assembler::zero, COMPARE_CHAR); |
|
7327 |
|
7328 lea(ary1, Address(ary1, len, Address::times_1)); |
|
7329 negptr(len); |
|
7330 |
|
7331 bind(COMPARE_VECTORS); |
|
7332 movl(tmp1, Address(ary1, len, Address::times_1)); |
|
7333 andl(tmp1, 0x80808080); |
|
7334 jccb(Assembler::notZero, TRUE_LABEL); |
|
7335 addptr(len, 4); |
|
7336 jcc(Assembler::notZero, COMPARE_VECTORS); |
|
7337 |
|
7338 // Compare trailing char (final 2 bytes), if any |
|
7339 bind(COMPARE_CHAR); |
|
7340 testl(result, 0x2); // tail char |
|
7341 jccb(Assembler::zero, COMPARE_BYTE); |
|
7342 load_unsigned_short(tmp1, Address(ary1, 0)); |
|
7343 andl(tmp1, 0x00008080); |
|
7344 jccb(Assembler::notZero, TRUE_LABEL); |
|
7345 subptr(result, 2); |
|
7346 lea(ary1, Address(ary1, 2)); |
|
7347 |
|
7348 bind(COMPARE_BYTE); |
|
7349 testl(result, 0x1); // tail byte |
|
7350 jccb(Assembler::zero, FALSE_LABEL); |
|
7351 load_unsigned_byte(tmp1, Address(ary1, 0)); |
|
7352 andl(tmp1, 0x00000080); |
|
7353 jccb(Assembler::notEqual, TRUE_LABEL); |
|
7354 jmpb(FALSE_LABEL); |
|
7355 |
|
7356 bind(TRUE_LABEL); |
|
7357 movl(result, 1); // return true |
|
7358 jmpb(DONE); |
|
7359 |
|
7360 bind(FALSE_LABEL); |
|
7361 xorl(result, result); // return false |
|
7362 |
|
7363 // That's it |
|
7364 bind(DONE); |
|
7365 if (UseAVX >= 2) { |
|
7366 // clean upper bits of YMM registers |
|
7367 vpxor(vec1, vec1); |
|
7368 vpxor(vec2, vec2); |
|
7369 } |
|
7370 } |
|
7371 |
|
7372 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings. |
|
7373 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2, |
|
7374 Register limit, Register result, Register chr, |
|
7375 XMMRegister vec1, XMMRegister vec2, bool is_char) { |
|
7376 ShortBranchVerifier sbv(this); |
|
7377 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE; |
6934 |
7378 |
6935 int length_offset = arrayOopDesc::length_offset_in_bytes(); |
7379 int length_offset = arrayOopDesc::length_offset_in_bytes(); |
6936 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); |
7380 int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE); |
6937 |
|
6938 // Check the input args |
|
6939 cmpptr(ary1, ary2); |
|
6940 jcc(Assembler::equal, TRUE_LABEL); |
|
6941 |
7381 |
6942 if (is_array_equ) { |
7382 if (is_array_equ) { |
|
7383 // Check the input args |
|
7384 cmpptr(ary1, ary2); |
|
7385 jcc(Assembler::equal, TRUE_LABEL); |
|
7386 |
6943 // Need additional checks for arrays_equals. |
7387 // Need additional checks for arrays_equals. |
6944 testptr(ary1, ary1); |
7388 testptr(ary1, ary1); |
6945 jcc(Assembler::zero, FALSE_LABEL); |
7389 jcc(Assembler::zero, FALSE_LABEL); |
6946 testptr(ary2, ary2); |
7390 testptr(ary2, ary2); |
6947 jcc(Assembler::zero, FALSE_LABEL); |
7391 jcc(Assembler::zero, FALSE_LABEL); |
9083 #endif // LP64 |
9546 #endif // LP64 |
9084 #undef BIND |
9547 #undef BIND |
9085 #undef BLOCK_COMMENT |
9548 #undef BLOCK_COMMENT |
9086 |
9549 |
9087 |
9550 |
|
9551 // Compress char[] array to byte[]. |
|
9552 void MacroAssembler::char_array_compress(Register src, Register dst, Register len, |
|
9553 XMMRegister tmp1Reg, XMMRegister tmp2Reg, |
|
9554 XMMRegister tmp3Reg, XMMRegister tmp4Reg, |
|
9555 Register tmp5, Register result) { |
|
9556 Label copy_chars_loop, return_length, return_zero, done; |
|
9557 |
|
9558 // rsi: src |
|
9559 // rdi: dst |
|
9560 // rdx: len |
|
9561 // rcx: tmp5 |
|
9562 // rax: result |
|
9563 |
|
9564 // rsi holds start addr of source char[] to be compressed |
|
9565 // rdi holds start addr of destination byte[] |
|
9566 // rdx holds length |
|
9567 |
|
9568 assert(len != result, ""); |
|
9569 |
|
9570 // save length for return |
|
9571 push(len); |
|
9572 |
|
9573 if (UseSSE42Intrinsics) { |
|
9574 Label copy_32_loop, copy_16, copy_tail; |
|
9575 |
|
9576 movl(result, len); |
|
9577 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors |
|
9578 |
|
9579 // vectored compression |
|
9580 andl(len, 0xfffffff0); // vector count (in chars) |
|
9581 andl(result, 0x0000000f); // tail count (in chars) |
|
9582 testl(len, len); |
|
9583 jccb(Assembler::zero, copy_16); |
|
9584 |
|
9585 // compress 16 chars per iter |
|
9586 movdl(tmp1Reg, tmp5); |
|
9587 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg |
|
9588 pxor(tmp4Reg, tmp4Reg); |
|
9589 |
|
9590 lea(src, Address(src, len, Address::times_2)); |
|
9591 lea(dst, Address(dst, len, Address::times_1)); |
|
9592 negptr(len); |
|
9593 |
|
9594 bind(copy_32_loop); |
|
9595 movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters |
|
9596 por(tmp4Reg, tmp2Reg); |
|
9597 movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters |
|
9598 por(tmp4Reg, tmp3Reg); |
|
9599 ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector |
|
9600 jcc(Assembler::notZero, return_zero); |
|
9601 packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte |
|
9602 movdqu(Address(dst, len, Address::times_1), tmp2Reg); |
|
9603 addptr(len, 16); |
|
9604 jcc(Assembler::notZero, copy_32_loop); |
|
9605 |
|
9606 // compress next vector of 8 chars (if any) |
|
9607 bind(copy_16); |
|
9608 movl(len, result); |
|
9609 andl(len, 0xfffffff8); // vector count (in chars) |
|
9610 andl(result, 0x00000007); // tail count (in chars) |
|
9611 testl(len, len); |
|
9612 jccb(Assembler::zero, copy_tail); |
|
9613 |
|
9614 movdl(tmp1Reg, tmp5); |
|
9615 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg |
|
9616 pxor(tmp3Reg, tmp3Reg); |
|
9617 |
|
9618 movdqu(tmp2Reg, Address(src, 0)); |
|
9619 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector |
|
9620 jccb(Assembler::notZero, return_zero); |
|
9621 packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte |
|
9622 movq(Address(dst, 0), tmp2Reg); |
|
9623 addptr(src, 16); |
|
9624 addptr(dst, 8); |
|
9625 |
|
9626 bind(copy_tail); |
|
9627 movl(len, result); |
|
9628 } |
|
9629 // compress 1 char per iter |
|
9630 testl(len, len); |
|
9631 jccb(Assembler::zero, return_length); |
|
9632 lea(src, Address(src, len, Address::times_2)); |
|
9633 lea(dst, Address(dst, len, Address::times_1)); |
|
9634 negptr(len); |
|
9635 |
|
9636 bind(copy_chars_loop); |
|
9637 load_unsigned_short(result, Address(src, len, Address::times_2)); |
|
9638 testl(result, 0xff00); // check if Unicode char |
|
9639 jccb(Assembler::notZero, return_zero); |
|
9640 movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte |
|
9641 increment(len); |
|
9642 jcc(Assembler::notZero, copy_chars_loop); |
|
9643 |
|
9644 // if compression succeeded, return length |
|
9645 bind(return_length); |
|
9646 pop(result); |
|
9647 jmpb(done); |
|
9648 |
|
9649 // if compression failed, return 0 |
|
9650 bind(return_zero); |
|
9651 xorl(result, result); |
|
9652 addptr(rsp, wordSize); |
|
9653 |
|
9654 bind(done); |
|
9655 } |
|
9656 |
|
9657 // Inflate byte[] array to char[]. |
|
9658 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, |
|
9659 XMMRegister tmp1, Register tmp2) { |
|
9660 Label copy_chars_loop, done; |
|
9661 |
|
9662 // rsi: src |
|
9663 // rdi: dst |
|
9664 // rdx: len |
|
9665 // rcx: tmp2 |
|
9666 |
|
9667 // rsi holds start addr of source byte[] to be inflated |
|
9668 // rdi holds start addr of destination char[] |
|
9669 // rdx holds length |
|
9670 assert_different_registers(src, dst, len, tmp2); |
|
9671 |
|
9672 if (UseSSE42Intrinsics) { |
|
9673 Label copy_8_loop, copy_bytes, copy_tail; |
|
9674 |
|
9675 movl(tmp2, len); |
|
9676 andl(tmp2, 0x00000007); // tail count (in chars) |
|
9677 andl(len, 0xfffffff8); // vector count (in chars) |
|
9678 jccb(Assembler::zero, copy_tail); |
|
9679 |
|
9680 // vectored inflation |
|
9681 lea(src, Address(src, len, Address::times_1)); |
|
9682 lea(dst, Address(dst, len, Address::times_2)); |
|
9683 negptr(len); |
|
9684 |
|
9685 // inflate 8 chars per iter |
|
9686 bind(copy_8_loop); |
|
9687 pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words |
|
9688 movdqu(Address(dst, len, Address::times_2), tmp1); |
|
9689 addptr(len, 8); |
|
9690 jcc(Assembler::notZero, copy_8_loop); |
|
9691 |
|
9692 bind(copy_tail); |
|
9693 movl(len, tmp2); |
|
9694 |
|
9695 cmpl(len, 4); |
|
9696 jccb(Assembler::less, copy_bytes); |
|
9697 |
|
9698 movdl(tmp1, Address(src, 0)); // load 4 byte chars |
|
9699 pmovzxbw(tmp1, tmp1); |
|
9700 movq(Address(dst, 0), tmp1); |
|
9701 subptr(len, 4); |
|
9702 addptr(src, 4); |
|
9703 addptr(dst, 8); |
|
9704 |
|
9705 bind(copy_bytes); |
|
9706 } |
|
9707 testl(len, len); |
|
9708 jccb(Assembler::zero, done); |
|
9709 lea(src, Address(src, len, Address::times_1)); |
|
9710 lea(dst, Address(dst, len, Address::times_2)); |
|
9711 negptr(len); |
|
9712 |
|
9713 // inflate 1 char per iter |
|
9714 bind(copy_chars_loop); |
|
9715 load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char |
|
9716 movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word |
|
9717 increment(len); |
|
9718 jcc(Assembler::notZero, copy_chars_loop); |
|
9719 |
|
9720 bind(done); |
|
9721 } |
|
9722 |
|
9723 |
9088 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { |
9724 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { |
9089 switch (cond) { |
9725 switch (cond) { |
9090 // Note some conditions are synonyms for others |
9726 // Note some conditions are synonyms for others |
9091 case Assembler::zero: return Assembler::notZero; |
9727 case Assembler::zero: return Assembler::notZero; |
9092 case Assembler::notZero: return Assembler::zero; |
9728 case Assembler::notZero: return Assembler::zero; |