hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
changeset 33628 09241459a8b8
parent 33198 b37ad9fbf681
child 34162 16b54851eaf6
child 33639 e672d407c0d9
equal deleted inserted replaced
33627:c5b7455f846e 33628:09241459a8b8
    44 #include "gc/g1/g1CollectedHeap.inline.hpp"
    44 #include "gc/g1/g1CollectedHeap.inline.hpp"
    45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
    45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
    46 #include "gc/g1/heapRegion.hpp"
    46 #include "gc/g1/heapRegion.hpp"
    47 #endif // INCLUDE_ALL_GCS
    47 #endif // INCLUDE_ALL_GCS
    48 #include "crc32c.h"
    48 #include "crc32c.h"
       
    49 #ifdef COMPILER2
       
    50 #include "opto/intrinsicnode.hpp"
       
    51 #endif
    49 
    52 
    50 #ifdef PRODUCT
    53 #ifdef PRODUCT
    51 #define BLOCK_COMMENT(str) /* nothing */
    54 #define BLOCK_COMMENT(str) /* nothing */
    52 #define STOP(error) stop(error)
    55 #define STOP(error) stop(error)
    53 #else
    56 #else
  6297     NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
  6300     NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
  6298     rep_stos();
  6301     rep_stos();
  6299   }
  6302   }
  6300 }
  6303 }
  6301 
  6304 
       
  6305 #ifdef COMPILER2
       
  6306 
  6302 // IndexOf for constant substrings with size >= 8 chars
  6307 // IndexOf for constant substrings with size >= 8 chars
  6303 // which don't need to be loaded through stack.
  6308 // which don't need to be loaded through stack.
  6304 void MacroAssembler::string_indexofC8(Register str1, Register str2,
  6309 void MacroAssembler::string_indexofC8(Register str1, Register str2,
  6305                                       Register cnt1, Register cnt2,
  6310                                       Register cnt1, Register cnt2,
  6306                                       int int_cnt2,  Register result,
  6311                                       int int_cnt2,  Register result,
  6307                                       XMMRegister vec, Register tmp) {
  6312                                       XMMRegister vec, Register tmp,
       
  6313                                       int ae) {
  6308   ShortBranchVerifier sbv(this);
  6314   ShortBranchVerifier sbv(this);
  6309   assert(UseSSE42Intrinsics, "SSE4.2 is required");
  6315   assert(UseSSE42Intrinsics, "SSE4.2 is required");
  6310 
  6316   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
  6311   // This method uses pcmpestri instruction with bound registers
  6317 
       
  6318   // This method uses the pcmpestri instruction with bound registers
  6312   //   inputs:
  6319   //   inputs:
  6313   //     xmm - substring
  6320   //     xmm - substring
  6314   //     rax - substring length (elements count)
  6321   //     rax - substring length (elements count)
  6315   //     mem - scanned string
  6322   //     mem - scanned string
  6316   //     rdx - string length (elements count)
  6323   //     rdx - string length (elements count)
  6317   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
  6324   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
       
  6325   //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
  6318   //   outputs:
  6326   //   outputs:
  6319   //     rcx - matched index in string
  6327   //     rcx - matched index in string
  6320   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  6328   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
       
  6329   int mode   = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
       
  6330   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
       
  6331   Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
       
  6332   Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
  6321 
  6333 
  6322   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
  6334   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
  6323         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
  6335         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
  6324         MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
  6336         MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
  6325 
  6337 
  6326   // Note, inline_string_indexOf() generates checks:
  6338   // Note, inline_string_indexOf() generates checks:
  6327   // if (substr.count > string.count) return -1;
  6339   // if (substr.count > string.count) return -1;
  6328   // if (substr.count == 0) return 0;
  6340   // if (substr.count == 0) return 0;
  6329   assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
  6341   assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars");
  6330 
  6342 
  6331   // Load substring.
  6343   // Load substring.
  6332   movdqu(vec, Address(str2, 0));
  6344   if (ae == StrIntrinsicNode::UL) {
       
  6345     pmovzxbw(vec, Address(str2, 0));
       
  6346   } else {
       
  6347     movdqu(vec, Address(str2, 0));
       
  6348   }
  6333   movl(cnt2, int_cnt2);
  6349   movl(cnt2, int_cnt2);
  6334   movptr(result, str1); // string addr
  6350   movptr(result, str1); // string addr
  6335 
  6351 
  6336   if (int_cnt2 > 8) {
  6352   if (int_cnt2 > stride) {
  6337     jmpb(SCAN_TO_SUBSTR);
  6353     jmpb(SCAN_TO_SUBSTR);
  6338 
  6354 
  6339     // Reload substr for rescan, this code
  6355     // Reload substr for rescan, this code
  6340     // is executed only for large substrings (> 8 chars)
  6356     // is executed only for large substrings (> 8 chars)
  6341     bind(RELOAD_SUBSTR);
  6357     bind(RELOAD_SUBSTR);
  6342     movdqu(vec, Address(str2, 0));
  6358     if (ae == StrIntrinsicNode::UL) {
       
  6359       pmovzxbw(vec, Address(str2, 0));
       
  6360     } else {
       
  6361       movdqu(vec, Address(str2, 0));
       
  6362     }
  6343     negptr(cnt2); // Jumped here with negative cnt2, convert to positive
  6363     negptr(cnt2); // Jumped here with negative cnt2, convert to positive
  6344 
  6364 
  6345     bind(RELOAD_STR);
  6365     bind(RELOAD_STR);
  6346     // We came here after the beginning of the substring was
  6366     // We came here after the beginning of the substring was
  6347     // matched but the rest of it was not so we need to search
  6367     // matched but the rest of it was not so we need to search
  6356 
  6376 
  6357     decrementl(cnt1);     // Shift to next element
  6377     decrementl(cnt1);     // Shift to next element
  6358     cmpl(cnt1, cnt2);
  6378     cmpl(cnt1, cnt2);
  6359     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6379     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6360 
  6380 
  6361     addptr(result, 2);
  6381     addptr(result, (1<<scale1));
  6362 
  6382 
  6363   } // (int_cnt2 > 8)
  6383   } // (int_cnt2 > 8)
  6364 
  6384 
  6365   // Scan string for start of substr in 16-byte vectors
  6385   // Scan string for start of substr in 16-byte vectors
  6366   bind(SCAN_TO_SUBSTR);
  6386   bind(SCAN_TO_SUBSTR);
  6367   pcmpestri(vec, Address(result, 0), 0x0d);
  6387   pcmpestri(vec, Address(result, 0), mode);
  6368   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  6388   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  6369   subl(cnt1, 8);
  6389   subl(cnt1, stride);
  6370   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  6390   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  6371   cmpl(cnt1, cnt2);
  6391   cmpl(cnt1, cnt2);
  6372   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6392   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6373   addptr(result, 16);
  6393   addptr(result, 16);
  6374   jmpb(SCAN_TO_SUBSTR);
  6394   jmpb(SCAN_TO_SUBSTR);
  6375 
  6395 
  6376   // Found a potential substr
  6396   // Found a potential substr
  6377   bind(FOUND_CANDIDATE);
  6397   bind(FOUND_CANDIDATE);
  6378   // Matched whole vector if first element matched (tmp(rcx) == 0).
  6398   // Matched whole vector if first element matched (tmp(rcx) == 0).
  6379   if (int_cnt2 == 8) {
  6399   if (int_cnt2 == stride) {
  6380     jccb(Assembler::overflow, RET_FOUND);    // OF == 1
  6400     jccb(Assembler::overflow, RET_FOUND);    // OF == 1
  6381   } else { // int_cnt2 > 8
  6401   } else { // int_cnt2 > 8
  6382     jccb(Assembler::overflow, FOUND_SUBSTR);
  6402     jccb(Assembler::overflow, FOUND_SUBSTR);
  6383   }
  6403   }
  6384   // After pcmpestri tmp(rcx) contains matched element index
  6404   // After pcmpestri tmp(rcx) contains matched element index
  6385   // Compute start addr of substr
  6405   // Compute start addr of substr
  6386   lea(result, Address(result, tmp, Address::times_2));
  6406   lea(result, Address(result, tmp, scale1));
  6387 
  6407 
  6388   // Make sure string is still long enough
  6408   // Make sure string is still long enough
  6389   subl(cnt1, tmp);
  6409   subl(cnt1, tmp);
  6390   cmpl(cnt1, cnt2);
  6410   cmpl(cnt1, cnt2);
  6391   if (int_cnt2 == 8) {
  6411   if (int_cnt2 == stride) {
  6392     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  6412     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  6393   } else { // int_cnt2 > 8
  6413   } else { // int_cnt2 > 8
  6394     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
  6414     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
  6395   }
  6415   }
  6396   // Left less then substring.
  6416   // Left less then substring.
  6397 
  6417 
  6398   bind(RET_NOT_FOUND);
  6418   bind(RET_NOT_FOUND);
  6399   movl(result, -1);
  6419   movl(result, -1);
  6400   jmpb(EXIT);
  6420   jmpb(EXIT);
  6401 
  6421 
  6402   if (int_cnt2 > 8) {
  6422   if (int_cnt2 > stride) {
  6403     // This code is optimized for the case when whole substring
  6423     // This code is optimized for the case when whole substring
  6404     // is matched if its head is matched.
  6424     // is matched if its head is matched.
  6405     bind(MATCH_SUBSTR_HEAD);
  6425     bind(MATCH_SUBSTR_HEAD);
  6406     pcmpestri(vec, Address(result, 0), 0x0d);
  6426     pcmpestri(vec, Address(result, 0), mode);
  6407     // Reload only string if does not match
  6427     // Reload only string if does not match
  6408     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
  6428     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
  6409 
  6429 
  6410     Label CONT_SCAN_SUBSTR;
  6430     Label CONT_SCAN_SUBSTR;
  6411     // Compare the rest of substring (> 8 chars).
  6431     // Compare the rest of substring (> 8 chars).
  6412     bind(FOUND_SUBSTR);
  6432     bind(FOUND_SUBSTR);
  6413     // First 8 chars are already matched.
  6433     // First 8 chars are already matched.
  6414     negptr(cnt2);
  6434     negptr(cnt2);
  6415     addptr(cnt2, 8);
  6435     addptr(cnt2, stride);
  6416 
  6436 
  6417     bind(SCAN_SUBSTR);
  6437     bind(SCAN_SUBSTR);
  6418     subl(cnt1, 8);
  6438     subl(cnt1, stride);
  6419     cmpl(cnt2, -8); // Do not read beyond substring
  6439     cmpl(cnt2, -stride); // Do not read beyond substring
  6420     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
  6440     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
  6421     // Back-up strings to avoid reading beyond substring:
  6441     // Back-up strings to avoid reading beyond substring:
  6422     // cnt1 = cnt1 - cnt2 + 8
  6442     // cnt1 = cnt1 - cnt2 + 8
  6423     addl(cnt1, cnt2); // cnt2 is negative
  6443     addl(cnt1, cnt2); // cnt2 is negative
  6424     addl(cnt1, 8);
  6444     addl(cnt1, stride);
  6425     movl(cnt2, 8); negptr(cnt2);
  6445     movl(cnt2, stride); negptr(cnt2);
  6426     bind(CONT_SCAN_SUBSTR);
  6446     bind(CONT_SCAN_SUBSTR);
  6427     if (int_cnt2 < (int)G) {
  6447     if (int_cnt2 < (int)G) {
  6428       movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
  6448       int tail_off1 = int_cnt2<<scale1;
  6429       pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
  6449       int tail_off2 = int_cnt2<<scale2;
       
  6450       if (ae == StrIntrinsicNode::UL) {
       
  6451         pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
       
  6452       } else {
       
  6453         movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
       
  6454       }
       
  6455       pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
  6430     } else {
  6456     } else {
  6431       // calculate index in register to avoid integer overflow (int_cnt2*2)
  6457       // calculate index in register to avoid integer overflow (int_cnt2*2)
  6432       movl(tmp, int_cnt2);
  6458       movl(tmp, int_cnt2);
  6433       addptr(tmp, cnt2);
  6459       addptr(tmp, cnt2);
  6434       movdqu(vec, Address(str2, tmp, Address::times_2, 0));
  6460       if (ae == StrIntrinsicNode::UL) {
  6435       pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
  6461         pmovzxbw(vec, Address(str2, tmp, scale2, 0));
       
  6462       } else {
       
  6463         movdqu(vec, Address(str2, tmp, scale2, 0));
       
  6464       }
       
  6465       pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
  6436     }
  6466     }
  6437     // Need to reload strings pointers if not matched whole vector
  6467     // Need to reload strings pointers if not matched whole vector
  6438     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  6468     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  6439     addptr(cnt2, 8);
  6469     addptr(cnt2, stride);
  6440     jcc(Assembler::negative, SCAN_SUBSTR);
  6470     jcc(Assembler::negative, SCAN_SUBSTR);
  6441     // Fall through if found full substring
  6471     // Fall through if found full substring
  6442 
  6472 
  6443   } // (int_cnt2 > 8)
  6473   } // (int_cnt2 > 8)
  6444 
  6474 
  6445   bind(RET_FOUND);
  6475   bind(RET_FOUND);
  6446   // Found result if we matched full small substring.
  6476   // Found result if we matched full small substring.
  6447   // Compute substr offset
  6477   // Compute substr offset
  6448   subptr(result, str1);
  6478   subptr(result, str1);
  6449   shrl(result, 1); // index
  6479   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
       
  6480     shrl(result, 1); // index
       
  6481   }
  6450   bind(EXIT);
  6482   bind(EXIT);
  6451 
  6483 
  6452 } // string_indexofC8
  6484 } // string_indexofC8
  6453 
  6485 
  6454 // Small strings are loaded through stack if they cross page boundary.
  6486 // Small strings are loaded through stack if they cross page boundary.
  6455 void MacroAssembler::string_indexof(Register str1, Register str2,
  6487 void MacroAssembler::string_indexof(Register str1, Register str2,
  6456                                     Register cnt1, Register cnt2,
  6488                                     Register cnt1, Register cnt2,
  6457                                     int int_cnt2,  Register result,
  6489                                     int int_cnt2,  Register result,
  6458                                     XMMRegister vec, Register tmp) {
  6490                                     XMMRegister vec, Register tmp,
       
  6491                                     int ae) {
  6459   ShortBranchVerifier sbv(this);
  6492   ShortBranchVerifier sbv(this);
  6460   assert(UseSSE42Intrinsics, "SSE4.2 is required");
  6493   assert(UseSSE42Intrinsics, "SSE4.2 is required");
       
  6494   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
       
  6495 
  6461   //
  6496   //
  6462   // int_cnt2 is length of small (< 8 chars) constant substring
  6497   // int_cnt2 is length of small (< 8 chars) constant substring
  6463   // or (-1) for non constant substring in which case its length
  6498   // or (-1) for non constant substring in which case its length
  6464   // is in cnt2 register.
  6499   // is in cnt2 register.
  6465   //
  6500   //
  6466   // Note, inline_string_indexOf() generates checks:
  6501   // Note, inline_string_indexOf() generates checks:
  6467   // if (substr.count > string.count) return -1;
  6502   // if (substr.count > string.count) return -1;
  6468   // if (substr.count == 0) return 0;
  6503   // if (substr.count == 0) return 0;
  6469   //
  6504   //
  6470   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
  6505   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
  6471 
  6506   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
  6472   // This method uses pcmpestri instruction with bound registers
  6507   // This method uses the pcmpestri instruction with bound registers
  6473   //   inputs:
  6508   //   inputs:
  6474   //     xmm - substring
  6509   //     xmm - substring
  6475   //     rax - substring length (elements count)
  6510   //     rax - substring length (elements count)
  6476   //     mem - scanned string
  6511   //     mem - scanned string
  6477   //     rdx - string length (elements count)
  6512   //     rdx - string length (elements count)
  6478   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
  6513   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
       
  6514   //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
  6479   //   outputs:
  6515   //   outputs:
  6480   //     rcx - matched index in string
  6516   //     rcx - matched index in string
  6481   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  6517   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
       
  6518   int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
       
  6519   Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
       
  6520   Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
  6482 
  6521 
  6483   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
  6522   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
  6484         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
  6523         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
  6485         FOUND_CANDIDATE;
  6524         FOUND_CANDIDATE;
  6486 
  6525 
  6490     Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
  6529     Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
  6491 
  6530 
  6492     movptr(tmp, rsp); // save old SP
  6531     movptr(tmp, rsp); // save old SP
  6493 
  6532 
  6494     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
  6533     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
  6495       if (int_cnt2 == 1) {  // One char
  6534       if (int_cnt2 == (1>>scale2)) { // One byte
       
  6535         assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding");
       
  6536         load_unsigned_byte(result, Address(str2, 0));
       
  6537         movdl(vec, result); // move 32 bits
       
  6538       } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) {  // Three bytes
       
  6539         // Not enough header space in 32-bit VM: 12+3 = 15.
       
  6540         movl(result, Address(str2, -1));
       
  6541         shrl(result, 8);
       
  6542         movdl(vec, result); // move 32 bits
       
  6543       } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) {  // One char
  6496         load_unsigned_short(result, Address(str2, 0));
  6544         load_unsigned_short(result, Address(str2, 0));
  6497         movdl(vec, result); // move 32 bits
  6545         movdl(vec, result); // move 32 bits
  6498       } else if (int_cnt2 == 2) { // Two chars
  6546       } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
  6499         movdl(vec, Address(str2, 0)); // move 32 bits
  6547         movdl(vec, Address(str2, 0)); // move 32 bits
  6500       } else if (int_cnt2 == 4) { // Four chars
  6548       } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
  6501         movq(vec, Address(str2, 0));  // move 64 bits
  6549         movq(vec, Address(str2, 0));  // move 64 bits
  6502       } else { // cnt2 = { 3, 5, 6, 7 }
  6550       } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
  6503         // Array header size is 12 bytes in 32-bit VM
  6551         // Array header size is 12 bytes in 32-bit VM
  6504         // + 6 bytes for 3 chars == 18 bytes,
  6552         // + 6 bytes for 3 chars == 18 bytes,
  6505         // enough space to load vec and shift.
  6553         // enough space to load vec and shift.
  6506         assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
  6554         assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
  6507         movdqu(vec, Address(str2, (int_cnt2*2)-16));
  6555         if (ae == StrIntrinsicNode::UL) {
  6508         psrldq(vec, 16-(int_cnt2*2));
  6556           int tail_off = int_cnt2-8;
       
  6557           pmovzxbw(vec, Address(str2, tail_off));
       
  6558           psrldq(vec, -2*tail_off);
       
  6559         }
       
  6560         else {
       
  6561           int tail_off = int_cnt2*(1<<scale2);
       
  6562           movdqu(vec, Address(str2, tail_off-16));
       
  6563           psrldq(vec, 16-tail_off);
       
  6564         }
  6509       }
  6565       }
  6510     } else { // not constant substring
  6566     } else { // not constant substring
  6511       cmpl(cnt2, 8);
  6567       cmpl(cnt2, stride);
  6512       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
  6568       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
  6513 
  6569 
  6514       // We can read beyond string if srt+16 does not cross page boundary
  6570       // We can read beyond string if srt+16 does not cross page boundary
  6515       // since heaps are aligned and mapped by pages.
  6571       // since heaps are aligned and mapped by pages.
  6516       assert(os::vm_page_size() < (int)G, "default page should be small");
  6572       assert(os::vm_page_size() < (int)G, "default page should be small");
  6519       cmpl(result, (os::vm_page_size()-16));
  6575       cmpl(result, (os::vm_page_size()-16));
  6520       jccb(Assembler::belowEqual, CHECK_STR);
  6576       jccb(Assembler::belowEqual, CHECK_STR);
  6521 
  6577 
  6522       // Move small strings to stack to allow load 16 bytes into vec.
  6578       // Move small strings to stack to allow load 16 bytes into vec.
  6523       subptr(rsp, 16);
  6579       subptr(rsp, 16);
  6524       int stk_offset = wordSize-2;
  6580       int stk_offset = wordSize-(1<<scale2);
  6525       push(cnt2);
  6581       push(cnt2);
  6526 
  6582 
  6527       bind(COPY_SUBSTR);
  6583       bind(COPY_SUBSTR);
  6528       load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
  6584       if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
  6529       movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
  6585         load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
       
  6586         movb(Address(rsp, cnt2, scale2, stk_offset), result);
       
  6587       } else if (ae == StrIntrinsicNode::UU) {
       
  6588         load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
       
  6589         movw(Address(rsp, cnt2, scale2, stk_offset), result);
       
  6590       }
  6530       decrement(cnt2);
  6591       decrement(cnt2);
  6531       jccb(Assembler::notZero, COPY_SUBSTR);
  6592       jccb(Assembler::notZero, COPY_SUBSTR);
  6532 
  6593 
  6533       pop(cnt2);
  6594       pop(cnt2);
  6534       movptr(str2, rsp);  // New substring address
  6595       movptr(str2, rsp);  // New substring address
  6535     } // non constant
  6596     } // non constant
  6536 
  6597 
  6537     bind(CHECK_STR);
  6598     bind(CHECK_STR);
  6538     cmpl(cnt1, 8);
  6599     cmpl(cnt1, stride);
  6539     jccb(Assembler::aboveEqual, BIG_STRINGS);
  6600     jccb(Assembler::aboveEqual, BIG_STRINGS);
  6540 
  6601 
  6541     // Check cross page boundary.
  6602     // Check cross page boundary.
  6542     movl(result, str1); // We need only low 32 bits
  6603     movl(result, str1); // We need only low 32 bits
  6543     andl(result, (os::vm_page_size()-1));
  6604     andl(result, (os::vm_page_size()-1));
  6544     cmpl(result, (os::vm_page_size()-16));
  6605     cmpl(result, (os::vm_page_size()-16));
  6545     jccb(Assembler::belowEqual, BIG_STRINGS);
  6606     jccb(Assembler::belowEqual, BIG_STRINGS);
  6546 
  6607 
  6547     subptr(rsp, 16);
  6608     subptr(rsp, 16);
  6548     int stk_offset = -2;
  6609     int stk_offset = -(1<<scale1);
  6549     if (int_cnt2 < 0) { // not constant
  6610     if (int_cnt2 < 0) { // not constant
  6550       push(cnt2);
  6611       push(cnt2);
  6551       stk_offset += wordSize;
  6612       stk_offset += wordSize;
  6552     }
  6613     }
  6553     movl(cnt2, cnt1);
  6614     movl(cnt2, cnt1);
  6554 
  6615 
  6555     bind(COPY_STR);
  6616     bind(COPY_STR);
  6556     load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
  6617     if (ae == StrIntrinsicNode::LL) {
  6557     movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
  6618       load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
       
  6619       movb(Address(rsp, cnt2, scale1, stk_offset), result);
       
  6620     } else {
       
  6621       load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
       
  6622       movw(Address(rsp, cnt2, scale1, stk_offset), result);
       
  6623     }
  6558     decrement(cnt2);
  6624     decrement(cnt2);
  6559     jccb(Assembler::notZero, COPY_STR);
  6625     jccb(Assembler::notZero, COPY_STR);
  6560 
  6626 
  6561     if (int_cnt2 < 0) { // not constant
  6627     if (int_cnt2 < 0) { // not constant
  6562       pop(cnt2);
  6628       pop(cnt2);
  6564     movptr(str1, rsp);  // New string address
  6630     movptr(str1, rsp);  // New string address
  6565 
  6631 
  6566     bind(BIG_STRINGS);
  6632     bind(BIG_STRINGS);
  6567     // Load substring.
  6633     // Load substring.
  6568     if (int_cnt2 < 0) { // -1
  6634     if (int_cnt2 < 0) { // -1
  6569       movdqu(vec, Address(str2, 0));
  6635       if (ae == StrIntrinsicNode::UL) {
       
  6636         pmovzxbw(vec, Address(str2, 0));
       
  6637       } else {
       
  6638         movdqu(vec, Address(str2, 0));
       
  6639       }
  6570       push(cnt2);       // substr count
  6640       push(cnt2);       // substr count
  6571       push(str2);       // substr addr
  6641       push(str2);       // substr addr
  6572       push(str1);       // string addr
  6642       push(str1);       // string addr
  6573     } else {
  6643     } else {
  6574       // Small (< 8 chars) constant substrings are loaded already.
  6644       // Small (< 8 chars) constant substrings are loaded already.
  6595     // Reload substr for rescan, this code
  6665     // Reload substr for rescan, this code
  6596     // is executed only for large substrings (> 8 chars)
  6666     // is executed only for large substrings (> 8 chars)
  6597     bind(RELOAD_SUBSTR);
  6667     bind(RELOAD_SUBSTR);
  6598     movptr(str2, Address(rsp, 2*wordSize));
  6668     movptr(str2, Address(rsp, 2*wordSize));
  6599     movl(cnt2, Address(rsp, 3*wordSize));
  6669     movl(cnt2, Address(rsp, 3*wordSize));
  6600     movdqu(vec, Address(str2, 0));
  6670     if (ae == StrIntrinsicNode::UL) {
       
  6671       pmovzxbw(vec, Address(str2, 0));
       
  6672     } else {
       
  6673       movdqu(vec, Address(str2, 0));
       
  6674     }
  6601     // We came here after the beginning of the substring was
  6675     // We came here after the beginning of the substring was
  6602     // matched but the rest of it was not so we need to search
  6676     // matched but the rest of it was not so we need to search
  6603     // again. Start from the next element after the previous match.
  6677     // again. Start from the next element after the previous match.
  6604     subptr(str1, result); // Restore counter
  6678     subptr(str1, result); // Restore counter
  6605     shrl(str1, 1);
  6679     if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
       
  6680       shrl(str1, 1);
       
  6681     }
  6606     addl(cnt1, str1);
  6682     addl(cnt1, str1);
  6607     decrementl(cnt1);   // Shift to next element
  6683     decrementl(cnt1);   // Shift to next element
  6608     cmpl(cnt1, cnt2);
  6684     cmpl(cnt1, cnt2);
  6609     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6685     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6610 
  6686 
  6611     addptr(result, 2);
  6687     addptr(result, (1<<scale1));
  6612   } // non constant
  6688   } // non constant
  6613 
  6689 
  6614   // Scan string for start of substr in 16-byte vectors
  6690   // Scan string for start of substr in 16-byte vectors
  6615   bind(SCAN_TO_SUBSTR);
  6691   bind(SCAN_TO_SUBSTR);
  6616   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  6692   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
  6617   pcmpestri(vec, Address(result, 0), 0x0d);
  6693   pcmpestri(vec, Address(result, 0), mode);
  6618   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  6694   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
  6619   subl(cnt1, 8);
  6695   subl(cnt1, stride);
  6620   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  6696   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
  6621   cmpl(cnt1, cnt2);
  6697   cmpl(cnt1, cnt2);
  6622   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6698   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
  6623   addptr(result, 16);
  6699   addptr(result, 16);
  6624 
  6700 
  6625   bind(ADJUST_STR);
  6701   bind(ADJUST_STR);
  6626   cmpl(cnt1, 8); // Do not read beyond string
  6702   cmpl(cnt1, stride); // Do not read beyond string
  6627   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  6703   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
  6628   // Back-up string to avoid reading beyond string.
  6704   // Back-up string to avoid reading beyond string.
  6629   lea(result, Address(result, cnt1, Address::times_2, -16));
  6705   lea(result, Address(result, cnt1, scale1, -16));
  6630   movl(cnt1, 8);
  6706   movl(cnt1, stride);
  6631   jmpb(SCAN_TO_SUBSTR);
  6707   jmpb(SCAN_TO_SUBSTR);
  6632 
  6708 
  6633   // Found a potential substr
  6709   // Found a potential substr
  6634   bind(FOUND_CANDIDATE);
  6710   bind(FOUND_CANDIDATE);
  6635   // After pcmpestri tmp(rcx) contains matched element index
  6711   // After pcmpestri tmp(rcx) contains matched element index
  6644   movl(result, -1);
  6720   movl(result, -1);
  6645   jmpb(CLEANUP);
  6721   jmpb(CLEANUP);
  6646 
  6722 
  6647   bind(FOUND_SUBSTR);
  6723   bind(FOUND_SUBSTR);
  6648   // Compute start addr of substr
  6724   // Compute start addr of substr
  6649   lea(result, Address(result, tmp, Address::times_2));
  6725   lea(result, Address(result, tmp, scale1));
  6650 
       
  6651   if (int_cnt2 > 0) { // Constant substring
  6726   if (int_cnt2 > 0) { // Constant substring
  6652     // Repeat search for small substring (< 8 chars)
  6727     // Repeat search for small substring (< 8 chars)
  6653     // from new point without reloading substring.
  6728     // from new point without reloading substring.
  6654     // Have to check that we don't read beyond string.
  6729     // Have to check that we don't read beyond string.
  6655     cmpl(tmp, 8-int_cnt2);
  6730     cmpl(tmp, stride-int_cnt2);
  6656     jccb(Assembler::greater, ADJUST_STR);
  6731     jccb(Assembler::greater, ADJUST_STR);
  6657     // Fall through if matched whole substring.
  6732     // Fall through if matched whole substring.
  6658   } else { // non constant
  6733   } else { // non constant
  6659     assert(int_cnt2 == -1, "should be != 0");
  6734     assert(int_cnt2 == -1, "should be != 0");
  6660 
  6735 
  6661     addl(tmp, cnt2);
  6736     addl(tmp, cnt2);
  6662     // Found result if we matched whole substring.
  6737     // Found result if we matched whole substring.
  6663     cmpl(tmp, 8);
  6738     cmpl(tmp, stride);
  6664     jccb(Assembler::lessEqual, RET_FOUND);
  6739     jccb(Assembler::lessEqual, RET_FOUND);
  6665 
  6740 
  6666     // Repeat search for small substring (<= 8 chars)
  6741     // Repeat search for small substring (<= 8 chars)
  6667     // from new point 'str1' without reloading substring.
  6742     // from new point 'str1' without reloading substring.
  6668     cmpl(cnt2, 8);
  6743     cmpl(cnt2, stride);
  6669     // Have to check that we don't read beyond string.
  6744     // Have to check that we don't read beyond string.
  6670     jccb(Assembler::lessEqual, ADJUST_STR);
  6745     jccb(Assembler::lessEqual, ADJUST_STR);
  6671 
  6746 
  6672     Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
  6747     Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
  6673     // Compare the rest of substring (> 8 chars).
  6748     // Compare the rest of substring (> 8 chars).
  6676     cmpl(tmp, cnt2);
  6751     cmpl(tmp, cnt2);
  6677     // First 8 chars are already matched.
  6752     // First 8 chars are already matched.
  6678     jccb(Assembler::equal, CHECK_NEXT);
  6753     jccb(Assembler::equal, CHECK_NEXT);
  6679 
  6754 
  6680     bind(SCAN_SUBSTR);
  6755     bind(SCAN_SUBSTR);
  6681     pcmpestri(vec, Address(str1, 0), 0x0d);
  6756     pcmpestri(vec, Address(str1, 0), mode);
  6682     // Need to reload strings pointers if not matched whole vector
  6757     // Need to reload strings pointers if not matched whole vector
  6683     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  6758     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
  6684 
  6759 
  6685     bind(CHECK_NEXT);
  6760     bind(CHECK_NEXT);
  6686     subl(cnt2, 8);
  6761     subl(cnt2, stride);
  6687     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
  6762     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
  6688     addptr(str1, 16);
  6763     addptr(str1, 16);
  6689     addptr(str2, 16);
  6764     if (ae == StrIntrinsicNode::UL) {
  6690     subl(cnt1, 8);
  6765       addptr(str2, 8);
  6691     cmpl(cnt2, 8); // Do not read beyond substring
  6766     } else {
       
  6767       addptr(str2, 16);
       
  6768     }
       
  6769     subl(cnt1, stride);
       
  6770     cmpl(cnt2, stride); // Do not read beyond substring
  6692     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
  6771     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
  6693     // Back-up strings to avoid reading beyond substring.
  6772     // Back-up strings to avoid reading beyond substring.
  6694     lea(str2, Address(str2, cnt2, Address::times_2, -16));
  6773 
  6695     lea(str1, Address(str1, cnt2, Address::times_2, -16));
  6774     if (ae == StrIntrinsicNode::UL) {
       
  6775       lea(str2, Address(str2, cnt2, scale2, -8));
       
  6776       lea(str1, Address(str1, cnt2, scale1, -16));
       
  6777     } else {
       
  6778       lea(str2, Address(str2, cnt2, scale2, -16));
       
  6779       lea(str1, Address(str1, cnt2, scale1, -16));
       
  6780     }
  6696     subl(cnt1, cnt2);
  6781     subl(cnt1, cnt2);
  6697     movl(cnt2, 8);
  6782     movl(cnt2, stride);
  6698     addl(cnt1, 8);
  6783     addl(cnt1, stride);
  6699     bind(CONT_SCAN_SUBSTR);
  6784     bind(CONT_SCAN_SUBSTR);
  6700     movdqu(vec, Address(str2, 0));
  6785     if (ae == StrIntrinsicNode::UL) {
       
  6786       pmovzxbw(vec, Address(str2, 0));
       
  6787     } else {
       
  6788       movdqu(vec, Address(str2, 0));
       
  6789     }
  6701     jmpb(SCAN_SUBSTR);
  6790     jmpb(SCAN_SUBSTR);
  6702 
  6791 
  6703     bind(RET_FOUND_LONG);
  6792     bind(RET_FOUND_LONG);
  6704     movptr(str1, Address(rsp, wordSize));
  6793     movptr(str1, Address(rsp, wordSize));
  6705   } // non constant
  6794   } // non constant
  6706 
  6795 
  6707   bind(RET_FOUND);
  6796   bind(RET_FOUND);
  6708   // Compute substr offset
  6797   // Compute substr offset
  6709   subptr(result, str1);
  6798   subptr(result, str1);
  6710   shrl(result, 1); // index
  6799   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
  6711 
  6800     shrl(result, 1); // index
       
  6801   }
  6712   bind(CLEANUP);
  6802   bind(CLEANUP);
  6713   pop(rsp); // restore SP
  6803   pop(rsp); // restore SP
  6714 
  6804 
  6715 } // string_indexof
  6805 } // string_indexof
  6716 
  6806 
  6717 // Compare strings.
  6807 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
       
  6808                                          XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
       
  6809   ShortBranchVerifier sbv(this);
       
  6810   assert(UseSSE42Intrinsics, "SSE4.2 is required");
       
  6811 
       
  6812   int stride = 8;
       
  6813 
       
  6814   Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
       
  6815         SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
       
  6816         RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
       
  6817         FOUND_SEQ_CHAR, DONE_LABEL;
       
  6818 
       
  6819   movptr(result, str1);
       
  6820   if (UseAVX >= 2) {
       
  6821     cmpl(cnt1, stride);
       
  6822     jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
       
  6823     cmpl(cnt1, 2*stride);
       
  6824     jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
       
  6825     movdl(vec1, ch);
       
  6826     vpbroadcastw(vec1, vec1);
       
  6827     vpxor(vec2, vec2);
       
  6828     movl(tmp, cnt1);
       
  6829     andl(tmp, 0xFFFFFFF0);  //vector count (in chars)
       
  6830     andl(cnt1,0x0000000F);  //tail count (in chars)
       
  6831 
       
  6832     bind(SCAN_TO_16_CHAR_LOOP);
       
  6833     vmovdqu(vec3, Address(result, 0));
       
  6834     vpcmpeqw(vec3, vec3, vec1, true);
       
  6835     vptest(vec2, vec3);
       
  6836     jcc(Assembler::carryClear, FOUND_CHAR);
       
  6837     addptr(result, 32);
       
  6838     subl(tmp, 2*stride);
       
  6839     jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
       
  6840     jmp(SCAN_TO_8_CHAR);
       
  6841     bind(SCAN_TO_8_CHAR_INIT);
       
  6842     movdl(vec1, ch);
       
  6843     pshuflw(vec1, vec1, 0x00);
       
  6844     pshufd(vec1, vec1, 0);
       
  6845     pxor(vec2, vec2);
       
  6846   }
       
  6847   if (UseAVX >= 2 || UseSSE42Intrinsics) {
       
  6848     bind(SCAN_TO_8_CHAR);
       
  6849     cmpl(cnt1, stride);
       
  6850     if (UseAVX >= 2) {
       
  6851       jccb(Assembler::less, SCAN_TO_CHAR);
       
  6852     }
       
  6853     if (!(UseAVX >= 2)) {
       
  6854       jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
       
  6855       movdl(vec1, ch);
       
  6856       pshuflw(vec1, vec1, 0x00);
       
  6857       pshufd(vec1, vec1, 0);
       
  6858       pxor(vec2, vec2);
       
  6859     }
       
  6860     movl(tmp, cnt1);
       
  6861     andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
       
  6862     andl(cnt1,0x00000007);  //tail count (in chars)
       
  6863 
       
  6864     bind(SCAN_TO_8_CHAR_LOOP);
       
  6865     movdqu(vec3, Address(result, 0));
       
  6866     pcmpeqw(vec3, vec1);
       
  6867     ptest(vec2, vec3);
       
  6868     jcc(Assembler::carryClear, FOUND_CHAR);
       
  6869     addptr(result, 16);
       
  6870     subl(tmp, stride);
       
  6871     jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
       
  6872   }
       
  6873   bind(SCAN_TO_CHAR);
       
  6874   testl(cnt1, cnt1);
       
  6875   jcc(Assembler::zero, RET_NOT_FOUND);
       
  6876 
       
  6877   bind(SCAN_TO_CHAR_LOOP);
       
  6878   load_unsigned_short(tmp, Address(result, 0));
       
  6879   cmpl(ch, tmp);
       
  6880   jccb(Assembler::equal, FOUND_SEQ_CHAR);
       
  6881   addptr(result, 2);
       
  6882   subl(cnt1, 1);
       
  6883   jccb(Assembler::zero, RET_NOT_FOUND);
       
  6884   jmp(SCAN_TO_CHAR_LOOP);
       
  6885 
       
  6886   bind(RET_NOT_FOUND);
       
  6887   movl(result, -1);
       
  6888   jmpb(DONE_LABEL);
       
  6889 
       
  6890   if (UseAVX >= 2 || UseSSE42Intrinsics) {
       
  6891     bind(FOUND_CHAR);
       
  6892     if (UseAVX >= 2) {
       
  6893       vpmovmskb(tmp, vec3);
       
  6894     } else {
       
  6895       pmovmskb(tmp, vec3);
       
  6896     }
       
  6897     bsfl(ch, tmp);
       
  6898     addl(result, ch);
       
  6899   }
       
  6900 
       
  6901   bind(FOUND_SEQ_CHAR);
       
  6902   subptr(result, str1);
       
  6903   shrl(result, 1);
       
  6904 
       
  6905   bind(DONE_LABEL);
       
  6906 } // string_indexof_char
       
  6907 
       
  6908 // helper function for string_compare
       
  6909 void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
       
  6910                                         Address::ScaleFactor scale, Address::ScaleFactor scale1,
       
  6911                                         Address::ScaleFactor scale2, Register index, int ae) {
       
  6912   if (ae == StrIntrinsicNode::LL) {
       
  6913     load_unsigned_byte(elem1, Address(str1, index, scale, 0));
       
  6914     load_unsigned_byte(elem2, Address(str2, index, scale, 0));
       
  6915   } else if (ae == StrIntrinsicNode::UU) {
       
  6916     load_unsigned_short(elem1, Address(str1, index, scale, 0));
       
  6917     load_unsigned_short(elem2, Address(str2, index, scale, 0));
       
  6918   } else {
       
  6919     load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
       
  6920     load_unsigned_short(elem2, Address(str2, index, scale2, 0));
       
  6921   }
       
  6922 }
       
  6923 
       
  6924 // Compare strings, used for char[] and byte[].
  6718 void MacroAssembler::string_compare(Register str1, Register str2,
  6925 void MacroAssembler::string_compare(Register str1, Register str2,
  6719                                     Register cnt1, Register cnt2, Register result,
  6926                                     Register cnt1, Register cnt2, Register result,
  6720                                     XMMRegister vec1) {
  6927                                     XMMRegister vec1, int ae) {
  6721   ShortBranchVerifier sbv(this);
  6928   ShortBranchVerifier sbv(this);
  6722   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
  6929   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
  6723 
  6930   int stride, stride2, adr_stride, adr_stride1, adr_stride2;
       
  6931   Address::ScaleFactor scale, scale1, scale2;
       
  6932 
       
  6933   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
       
  6934     shrl(cnt2, 1);
       
  6935   }
  6724   // Compute the minimum of the string lengths and the
  6936   // Compute the minimum of the string lengths and the
  6725   // difference of the string lengths (stack).
  6937   // difference of the string lengths (stack).
  6726   // Do the conditional move stuff
  6938   // Do the conditional move stuff
  6727   movl(result, cnt1);
  6939   movl(result, cnt1);
  6728   subl(cnt1, cnt2);
  6940   subl(cnt1, cnt2);
  6730   cmov32(Assembler::lessEqual, cnt2, result);
  6942   cmov32(Assembler::lessEqual, cnt2, result);
  6731 
  6943 
  6732   // Is the minimum length zero?
  6944   // Is the minimum length zero?
  6733   testl(cnt2, cnt2);
  6945   testl(cnt2, cnt2);
  6734   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
  6946   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
  6735 
  6947   if (ae == StrIntrinsicNode::LL) {
  6736   // Compare first characters
  6948     // Load first bytes
  6737   load_unsigned_short(result, Address(str1, 0));
  6949     load_unsigned_byte(result, Address(str1, 0));
  6738   load_unsigned_short(cnt1, Address(str2, 0));
  6950     load_unsigned_byte(cnt1, Address(str2, 0));
       
  6951   } else if (ae == StrIntrinsicNode::UU) {
       
  6952     // Load first characters
       
  6953     load_unsigned_short(result, Address(str1, 0));
       
  6954     load_unsigned_short(cnt1, Address(str2, 0));
       
  6955   } else {
       
  6956     load_unsigned_byte(result, Address(str1, 0));
       
  6957     load_unsigned_short(cnt1, Address(str2, 0));
       
  6958   }
  6739   subl(result, cnt1);
  6959   subl(result, cnt1);
  6740   jcc(Assembler::notZero,  POP_LABEL);
  6960   jcc(Assembler::notZero,  POP_LABEL);
       
  6961 
       
  6962   if (ae == StrIntrinsicNode::UU) {
       
  6963     // Divide length by 2 to get number of chars
       
  6964     shrl(cnt2, 1);
       
  6965   }
  6741   cmpl(cnt2, 1);
  6966   cmpl(cnt2, 1);
  6742   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
  6967   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
  6743 
  6968 
  6744   // Check if the strings start at the same location.
  6969   // Check if the strings start at the same location and setup scale and stride
  6745   cmpptr(str1, str2);
  6970   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6746   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
  6971     cmpptr(str1, str2);
  6747 
  6972     jcc(Assembler::equal, LENGTH_DIFF_LABEL);
  6748   Address::ScaleFactor scale = Address::times_2;
  6973     if (ae == StrIntrinsicNode::LL) {
  6749   int stride = 8;
  6974       scale = Address::times_1;
       
  6975       stride = 16;
       
  6976     } else {
       
  6977       scale = Address::times_2;
       
  6978       stride = 8;
       
  6979     }
       
  6980   } else {
       
  6981     scale1 = Address::times_1;
       
  6982     scale2 = Address::times_2;
       
  6983     stride = 8;
       
  6984   }
  6750 
  6985 
  6751   if (UseAVX >= 2 && UseSSE42Intrinsics) {
  6986   if (UseAVX >= 2 && UseSSE42Intrinsics) {
  6752     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
  6987     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
  6753     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
  6988     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
  6754     Label COMPARE_TAIL_LONG;
  6989     Label COMPARE_TAIL_LONG;
  6755     int pcmpmask = 0x19;
  6990     int pcmpmask = 0x19;
       
  6991     if (ae == StrIntrinsicNode::LL) {
       
  6992       pcmpmask &= ~0x01;
       
  6993     }
  6756 
  6994 
  6757     // Setup to compare 16-chars (32-bytes) vectors,
  6995     // Setup to compare 16-chars (32-bytes) vectors,
  6758     // start from first character again because it has aligned address.
  6996     // start from first character again because it has aligned address.
  6759     int stride2 = 16;
  6997     if (ae == StrIntrinsicNode::LL) {
  6760     int adr_stride  = stride  << scale;
  6998       stride2 = 32;
       
  6999     } else {
       
  7000       stride2 = 16;
       
  7001     }
       
  7002     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
       
  7003       adr_stride = stride << scale;
       
  7004     } else {
       
  7005       adr_stride1 = 8;  //stride << scale1;
       
  7006       adr_stride2 = 16; //stride << scale2;
       
  7007     }
  6761 
  7008 
  6762     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
  7009     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
  6763     // rax and rdx are used by pcmpestri as elements counters
  7010     // rax and rdx are used by pcmpestri as elements counters
  6764     movl(result, cnt2);
  7011     movl(result, cnt2);
  6765     andl(cnt2, ~(stride2-1));   // cnt2 holds the vector count
  7012     andl(cnt2, ~(stride2-1));   // cnt2 holds the vector count
  6766     jcc(Assembler::zero, COMPARE_TAIL_LONG);
  7013     jcc(Assembler::zero, COMPARE_TAIL_LONG);
  6767 
  7014 
  6768     // fast path : compare first 2 8-char vectors.
  7015     // fast path : compare first 2 8-char vectors.
  6769     bind(COMPARE_16_CHARS);
  7016     bind(COMPARE_16_CHARS);
  6770     movdqu(vec1, Address(str1, 0));
  7017     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
       
  7018       movdqu(vec1, Address(str1, 0));
       
  7019     } else {
       
  7020       pmovzxbw(vec1, Address(str1, 0));
       
  7021     }
  6771     pcmpestri(vec1, Address(str2, 0), pcmpmask);
  7022     pcmpestri(vec1, Address(str2, 0), pcmpmask);
  6772     jccb(Assembler::below, COMPARE_INDEX_CHAR);
  7023     jccb(Assembler::below, COMPARE_INDEX_CHAR);
  6773 
  7024 
  6774     movdqu(vec1, Address(str1, adr_stride));
  7025     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6775     pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
  7026       movdqu(vec1, Address(str1, adr_stride));
       
  7027       pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
       
  7028     } else {
       
  7029       pmovzxbw(vec1, Address(str1, adr_stride1));
       
  7030       pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
       
  7031     }
  6776     jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
  7032     jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
  6777     addl(cnt1, stride);
  7033     addl(cnt1, stride);
  6778 
  7034 
  6779     // Compare the characters at index in cnt1
  7035     // Compare the characters at index in cnt1
  6780     bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
  7036     bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
  6781     load_unsigned_short(result, Address(str1, cnt1, scale));
  7037     load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
  6782     load_unsigned_short(cnt2, Address(str2, cnt1, scale));
       
  6783     subl(result, cnt2);
  7038     subl(result, cnt2);
  6784     jmp(POP_LABEL);
  7039     jmp(POP_LABEL);
  6785 
  7040 
  6786     // Setup the registers to start vector comparison loop
  7041     // Setup the registers to start vector comparison loop
  6787     bind(COMPARE_WIDE_VECTORS);
  7042     bind(COMPARE_WIDE_VECTORS);
  6788     lea(str1, Address(str1, result, scale));
  7043     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6789     lea(str2, Address(str2, result, scale));
  7044       lea(str1, Address(str1, result, scale));
       
  7045       lea(str2, Address(str2, result, scale));
       
  7046     } else {
       
  7047       lea(str1, Address(str1, result, scale1));
       
  7048       lea(str2, Address(str2, result, scale2));
       
  7049     }
  6790     subl(result, stride2);
  7050     subl(result, stride2);
  6791     subl(cnt2, stride2);
  7051     subl(cnt2, stride2);
  6792     jccb(Assembler::zero, COMPARE_WIDE_TAIL);
  7052     jccb(Assembler::zero, COMPARE_WIDE_TAIL);
  6793     negptr(result);
  7053     negptr(result);
  6794 
  7054 
  6795     //  In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
  7055     //  In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
  6796     bind(COMPARE_WIDE_VECTORS_LOOP);
  7056     bind(COMPARE_WIDE_VECTORS_LOOP);
  6797     vmovdqu(vec1, Address(str1, result, scale));
  7057     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6798     vpxor(vec1, Address(str2, result, scale));
  7058       vmovdqu(vec1, Address(str1, result, scale));
       
  7059       vpxor(vec1, Address(str2, result, scale));
       
  7060     } else {
       
  7061       vpmovzxbw(vec1, Address(str1, result, scale1));
       
  7062       vpxor(vec1, Address(str2, result, scale2));
       
  7063     }
  6799     vptest(vec1, vec1);
  7064     vptest(vec1, vec1);
  6800     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
  7065     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
  6801     addptr(result, stride2);
  7066     addptr(result, stride2);
  6802     subl(cnt2, stride2);
  7067     subl(cnt2, stride2);
  6803     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
  7068     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
  6816 
  7081 
  6817     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
  7082     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
  6818     bind(VECTOR_NOT_EQUAL);
  7083     bind(VECTOR_NOT_EQUAL);
  6819     // clean upper bits of YMM registers
  7084     // clean upper bits of YMM registers
  6820     vpxor(vec1, vec1);
  7085     vpxor(vec1, vec1);
  6821     lea(str1, Address(str1, result, scale));
  7086     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6822     lea(str2, Address(str2, result, scale));
  7087       lea(str1, Address(str1, result, scale));
       
  7088       lea(str2, Address(str2, result, scale));
       
  7089     } else {
       
  7090       lea(str1, Address(str1, result, scale1));
       
  7091       lea(str2, Address(str2, result, scale2));
       
  7092     }
  6823     jmp(COMPARE_16_CHARS);
  7093     jmp(COMPARE_16_CHARS);
  6824 
  7094 
  6825     // Compare tail chars, length between 1 to 15 chars
  7095     // Compare tail chars, length between 1 to 15 chars
  6826     bind(COMPARE_TAIL_LONG);
  7096     bind(COMPARE_TAIL_LONG);
  6827     movl(cnt2, result);
  7097     movl(cnt2, result);
  6828     cmpl(cnt2, stride);
  7098     cmpl(cnt2, stride);
  6829     jccb(Assembler::less, COMPARE_SMALL_STR);
  7099     jccb(Assembler::less, COMPARE_SMALL_STR);
  6830 
  7100 
  6831     movdqu(vec1, Address(str1, 0));
  7101     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
       
  7102       movdqu(vec1, Address(str1, 0));
       
  7103     } else {
       
  7104       pmovzxbw(vec1, Address(str1, 0));
       
  7105     }
  6832     pcmpestri(vec1, Address(str2, 0), pcmpmask);
  7106     pcmpestri(vec1, Address(str2, 0), pcmpmask);
  6833     jcc(Assembler::below, COMPARE_INDEX_CHAR);
  7107     jcc(Assembler::below, COMPARE_INDEX_CHAR);
  6834     subptr(cnt2, stride);
  7108     subptr(cnt2, stride);
  6835     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
  7109     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
  6836     lea(str1, Address(str1, result, scale));
  7110     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6837     lea(str2, Address(str2, result, scale));
  7111       lea(str1, Address(str1, result, scale));
       
  7112       lea(str2, Address(str2, result, scale));
       
  7113     } else {
       
  7114       lea(str1, Address(str1, result, scale1));
       
  7115       lea(str2, Address(str2, result, scale2));
       
  7116     }
  6838     negptr(cnt2);
  7117     negptr(cnt2);
  6839     jmpb(WHILE_HEAD_LABEL);
  7118     jmpb(WHILE_HEAD_LABEL);
  6840 
  7119 
  6841     bind(COMPARE_SMALL_STR);
  7120     bind(COMPARE_SMALL_STR);
  6842   } else if (UseSSE42Intrinsics) {
  7121   } else if (UseSSE42Intrinsics) {
  6844     int pcmpmask = 0x19;
  7123     int pcmpmask = 0x19;
  6845     // Setup to compare 8-char (16-byte) vectors,
  7124     // Setup to compare 8-char (16-byte) vectors,
  6846     // start from first character again because it has aligned address.
  7125     // start from first character again because it has aligned address.
  6847     movl(result, cnt2);
  7126     movl(result, cnt2);
  6848     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
  7127     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
       
  7128     if (ae == StrIntrinsicNode::LL) {
       
  7129       pcmpmask &= ~0x01;
       
  7130     }
  6849     jccb(Assembler::zero, COMPARE_TAIL);
  7131     jccb(Assembler::zero, COMPARE_TAIL);
  6850 
  7132     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6851     lea(str1, Address(str1, result, scale));
  7133       lea(str1, Address(str1, result, scale));
  6852     lea(str2, Address(str2, result, scale));
  7134       lea(str2, Address(str2, result, scale));
       
  7135     } else {
       
  7136       lea(str1, Address(str1, result, scale1));
       
  7137       lea(str2, Address(str2, result, scale2));
       
  7138     }
  6853     negptr(result);
  7139     negptr(result);
  6854 
  7140 
  6855     // pcmpestri
  7141     // pcmpestri
  6856     //   inputs:
  7142     //   inputs:
  6857     //     vec1- substring
  7143     //     vec1- substring
  6863     //   outputs:
  7149     //   outputs:
  6864     //     rcx - first mismatched element index
  7150     //     rcx - first mismatched element index
  6865     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
  7151     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
  6866 
  7152 
  6867     bind(COMPARE_WIDE_VECTORS);
  7153     bind(COMPARE_WIDE_VECTORS);
  6868     movdqu(vec1, Address(str1, result, scale));
  7154     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6869     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
  7155       movdqu(vec1, Address(str1, result, scale));
       
  7156       pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
       
  7157     } else {
       
  7158       pmovzxbw(vec1, Address(str1, result, scale1));
       
  7159       pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
       
  7160     }
  6870     // After pcmpestri cnt1(rcx) contains mismatched element index
  7161     // After pcmpestri cnt1(rcx) contains mismatched element index
  6871 
  7162 
  6872     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
  7163     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
  6873     addptr(result, stride);
  7164     addptr(result, stride);
  6874     subptr(cnt2, stride);
  7165     subptr(cnt2, stride);
  6879     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
  7170     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
  6880 
  7171 
  6881     movl(cnt2, stride);
  7172     movl(cnt2, stride);
  6882     movl(result, stride);
  7173     movl(result, stride);
  6883     negptr(result);
  7174     negptr(result);
  6884     movdqu(vec1, Address(str1, result, scale));
  7175     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6885     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
  7176       movdqu(vec1, Address(str1, result, scale));
       
  7177       pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
       
  7178     } else {
       
  7179       pmovzxbw(vec1, Address(str1, result, scale1));
       
  7180       pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
       
  7181     }
  6886     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
  7182     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
  6887 
  7183 
  6888     // Mismatched characters in the vectors
  7184     // Mismatched characters in the vectors
  6889     bind(VECTOR_NOT_EQUAL);
  7185     bind(VECTOR_NOT_EQUAL);
  6890     addptr(cnt1, result);
  7186     addptr(cnt1, result);
  6891     load_unsigned_short(result, Address(str1, cnt1, scale));
  7187     load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
  6892     load_unsigned_short(cnt2, Address(str2, cnt1, scale));
       
  6893     subl(result, cnt2);
  7188     subl(result, cnt2);
  6894     jmpb(POP_LABEL);
  7189     jmpb(POP_LABEL);
  6895 
  7190 
  6896     bind(COMPARE_TAIL); // limit is zero
  7191     bind(COMPARE_TAIL); // limit is zero
  6897     movl(cnt2, result);
  7192     movl(cnt2, result);
  6898     // Fallthru to tail compare
  7193     // Fallthru to tail compare
  6899   }
  7194   }
  6900   // Shift str2 and str1 to the end of the arrays, negate min
  7195   // Shift str2 and str1 to the end of the arrays, negate min
  6901   lea(str1, Address(str1, cnt2, scale));
  7196   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  6902   lea(str2, Address(str2, cnt2, scale));
  7197     lea(str1, Address(str1, cnt2, scale));
       
  7198     lea(str2, Address(str2, cnt2, scale));
       
  7199   } else {
       
  7200     lea(str1, Address(str1, cnt2, scale1));
       
  7201     lea(str2, Address(str2, cnt2, scale2));
       
  7202   }
  6903   decrementl(cnt2);  // first character was compared already
  7203   decrementl(cnt2);  // first character was compared already
  6904   negptr(cnt2);
  7204   negptr(cnt2);
  6905 
  7205 
  6906   // Compare the rest of the elements
  7206   // Compare the rest of the elements
  6907   bind(WHILE_HEAD_LABEL);
  7207   bind(WHILE_HEAD_LABEL);
  6908   load_unsigned_short(result, Address(str1, cnt2, scale, 0));
  7208   load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);
  6909   load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
       
  6910   subl(result, cnt1);
  7209   subl(result, cnt1);
  6911   jccb(Assembler::notZero, POP_LABEL);
  7210   jccb(Assembler::notZero, POP_LABEL);
  6912   increment(cnt2);
  7211   increment(cnt2);
  6913   jccb(Assembler::notZero, WHILE_HEAD_LABEL);
  7212   jccb(Assembler::notZero, WHILE_HEAD_LABEL);
  6914 
  7213 
  6915   // Strings are equal up to min length.  Return the length difference.
  7214   // Strings are equal up to min length.  Return the length difference.
  6916   bind(LENGTH_DIFF_LABEL);
  7215   bind(LENGTH_DIFF_LABEL);
  6917   pop(result);
  7216   pop(result);
       
  7217   if (ae == StrIntrinsicNode::UU) {
       
  7218     // Divide diff by 2 to get number of chars
       
  7219     sarl(result, 1);
       
  7220   }
  6918   jmpb(DONE_LABEL);
  7221   jmpb(DONE_LABEL);
  6919 
  7222 
  6920   // Discard the stored length difference
  7223   // Discard the stored length difference
  6921   bind(POP_LABEL);
  7224   bind(POP_LABEL);
  6922   pop(cnt1);
  7225   pop(cnt1);
  6923 
  7226 
  6924   // That's it
  7227   // That's it
  6925   bind(DONE_LABEL);
  7228   bind(DONE_LABEL);
  6926 }
  7229   if(ae == StrIntrinsicNode::UL) {
  6927 
  7230     negl(result);
  6928 // Compare char[] arrays aligned to 4 bytes or substrings.
  7231   }
  6929 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
  7232 }
  6930                                         Register limit, Register result, Register chr,
  7233 
  6931                                         XMMRegister vec1, XMMRegister vec2) {
  7234 // Search for Non-ASCII character (Negative byte value) in a byte array,
       
  7235 // return true if it has any and false otherwise.
       
  7236 void MacroAssembler::has_negatives(Register ary1, Register len,
       
  7237                                    Register result, Register tmp1,
       
  7238                                    XMMRegister vec1, XMMRegister vec2) {
       
  7239 
       
  7240   // rsi: byte array
       
  7241   // rcx: len
       
  7242   // rax: result
  6932   ShortBranchVerifier sbv(this);
  7243   ShortBranchVerifier sbv(this);
  6933   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
  7244   assert_different_registers(ary1, len, result, tmp1);
       
  7245   assert_different_registers(vec1, vec2);
       
  7246   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
       
  7247 
       
  7248   // len == 0
       
  7249   testl(len, len);
       
  7250   jcc(Assembler::zero, FALSE_LABEL);
       
  7251 
       
  7252   movl(result, len); // copy
       
  7253 
       
  7254   if (UseAVX >= 2) {
       
  7255     // With AVX2, use 32-byte vector compare
       
  7256     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
       
  7257 
       
  7258     // Compare 32-byte vectors
       
  7259     andl(result, 0x0000001f);  //   tail count (in bytes)
       
  7260     andl(len, 0xffffffe0);   // vector count (in bytes)
       
  7261     jccb(Assembler::zero, COMPARE_TAIL);
       
  7262 
       
  7263     lea(ary1, Address(ary1, len, Address::times_1));
       
  7264     negptr(len);
       
  7265 
       
  7266     movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
       
  7267     movdl(vec2, tmp1);
       
  7268     vpbroadcastd(vec2, vec2);
       
  7269 
       
  7270     bind(COMPARE_WIDE_VECTORS);
       
  7271     vmovdqu(vec1, Address(ary1, len, Address::times_1));
       
  7272     vptest(vec1, vec2);
       
  7273     jccb(Assembler::notZero, TRUE_LABEL);
       
  7274     addptr(len, 32);
       
  7275     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
       
  7276 
       
  7277     testl(result, result);
       
  7278     jccb(Assembler::zero, FALSE_LABEL);
       
  7279 
       
  7280     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
       
  7281     vptest(vec1, vec2);
       
  7282     jccb(Assembler::notZero, TRUE_LABEL);
       
  7283     jmpb(FALSE_LABEL);
       
  7284 
       
  7285     bind(COMPARE_TAIL); // len is zero
       
  7286     movl(len, result);
       
  7287     // Fallthru to tail compare
       
  7288   } else if (UseSSE42Intrinsics) {
       
  7289     // With SSE4.2, use double quad vector compare
       
  7290     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
       
  7291 
       
  7292     // Compare 16-byte vectors
       
  7293     andl(result, 0x0000000f);  //   tail count (in bytes)
       
  7294     andl(len, 0xfffffff0);   // vector count (in bytes)
       
  7295     jccb(Assembler::zero, COMPARE_TAIL);
       
  7296 
       
  7297     lea(ary1, Address(ary1, len, Address::times_1));
       
  7298     negptr(len);
       
  7299 
       
  7300     movl(tmp1, 0x80808080);
       
  7301     movdl(vec2, tmp1);
       
  7302     pshufd(vec2, vec2, 0);
       
  7303 
       
  7304     bind(COMPARE_WIDE_VECTORS);
       
  7305     movdqu(vec1, Address(ary1, len, Address::times_1));
       
  7306     ptest(vec1, vec2);
       
  7307     jccb(Assembler::notZero, TRUE_LABEL);
       
  7308     addptr(len, 16);
       
  7309     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
       
  7310 
       
  7311     testl(result, result);
       
  7312     jccb(Assembler::zero, FALSE_LABEL);
       
  7313 
       
  7314     movdqu(vec1, Address(ary1, result, Address::times_1, -16));
       
  7315     ptest(vec1, vec2);
       
  7316     jccb(Assembler::notZero, TRUE_LABEL);
       
  7317     jmpb(FALSE_LABEL);
       
  7318 
       
  7319     bind(COMPARE_TAIL); // len is zero
       
  7320     movl(len, result);
       
  7321     // Fallthru to tail compare
       
  7322   }
       
  7323 
       
  7324   // Compare 4-byte vectors
       
  7325   andl(len, 0xfffffffc); // vector count (in bytes)
       
  7326   jccb(Assembler::zero, COMPARE_CHAR);
       
  7327 
       
  7328   lea(ary1, Address(ary1, len, Address::times_1));
       
  7329   negptr(len);
       
  7330 
       
  7331   bind(COMPARE_VECTORS);
       
  7332   movl(tmp1, Address(ary1, len, Address::times_1));
       
  7333   andl(tmp1, 0x80808080);
       
  7334   jccb(Assembler::notZero, TRUE_LABEL);
       
  7335   addptr(len, 4);
       
  7336   jcc(Assembler::notZero, COMPARE_VECTORS);
       
  7337 
       
  7338   // Compare trailing char (final 2 bytes), if any
       
  7339   bind(COMPARE_CHAR);
       
  7340   testl(result, 0x2);   // tail  char
       
  7341   jccb(Assembler::zero, COMPARE_BYTE);
       
  7342   load_unsigned_short(tmp1, Address(ary1, 0));
       
  7343   andl(tmp1, 0x00008080);
       
  7344   jccb(Assembler::notZero, TRUE_LABEL);
       
  7345   subptr(result, 2);
       
  7346   lea(ary1, Address(ary1, 2));
       
  7347 
       
  7348   bind(COMPARE_BYTE);
       
  7349   testl(result, 0x1);   // tail  byte
       
  7350   jccb(Assembler::zero, FALSE_LABEL);
       
  7351   load_unsigned_byte(tmp1, Address(ary1, 0));
       
  7352   andl(tmp1, 0x00000080);
       
  7353   jccb(Assembler::notEqual, TRUE_LABEL);
       
  7354   jmpb(FALSE_LABEL);
       
  7355 
       
  7356   bind(TRUE_LABEL);
       
  7357   movl(result, 1);   // return true
       
  7358   jmpb(DONE);
       
  7359 
       
  7360   bind(FALSE_LABEL);
       
  7361   xorl(result, result); // return false
       
  7362 
       
  7363   // That's it
       
  7364   bind(DONE);
       
  7365   if (UseAVX >= 2) {
       
  7366     // clean upper bits of YMM registers
       
  7367     vpxor(vec1, vec1);
       
  7368     vpxor(vec2, vec2);
       
  7369   }
       
  7370 }
       
  7371 
       
  7372 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
       
  7373 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
       
  7374                                    Register limit, Register result, Register chr,
       
  7375                                    XMMRegister vec1, XMMRegister vec2, bool is_char) {
       
  7376   ShortBranchVerifier sbv(this);
       
  7377   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
  6934 
  7378 
  6935   int length_offset  = arrayOopDesc::length_offset_in_bytes();
  7379   int length_offset  = arrayOopDesc::length_offset_in_bytes();
  6936   int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
  7380   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
  6937 
       
  6938   // Check the input args
       
  6939   cmpptr(ary1, ary2);
       
  6940   jcc(Assembler::equal, TRUE_LABEL);
       
  6941 
  7381 
  6942   if (is_array_equ) {
  7382   if (is_array_equ) {
       
  7383     // Check the input args
       
  7384     cmpptr(ary1, ary2);
       
  7385     jcc(Assembler::equal, TRUE_LABEL);
       
  7386 
  6943     // Need additional checks for arrays_equals.
  7387     // Need additional checks for arrays_equals.
  6944     testptr(ary1, ary1);
  7388     testptr(ary1, ary1);
  6945     jcc(Assembler::zero, FALSE_LABEL);
  7389     jcc(Assembler::zero, FALSE_LABEL);
  6946     testptr(ary2, ary2);
  7390     testptr(ary2, ary2);
  6947     jcc(Assembler::zero, FALSE_LABEL);
  7391     jcc(Assembler::zero, FALSE_LABEL);
  6960     // Load array address
  7404     // Load array address
  6961     lea(ary1, Address(ary1, base_offset));
  7405     lea(ary1, Address(ary1, base_offset));
  6962     lea(ary2, Address(ary2, base_offset));
  7406     lea(ary2, Address(ary2, base_offset));
  6963   }
  7407   }
  6964 
  7408 
  6965   shll(limit, 1);      // byte count != 0
  7409   if (is_array_equ && is_char) {
       
  7410     // arrays_equals when used for char[].
       
  7411     shll(limit, 1);      // byte count != 0
       
  7412   }
  6966   movl(result, limit); // copy
  7413   movl(result, limit); // copy
  6967 
  7414 
  6968   if (UseAVX >= 2) {
  7415   if (UseAVX >= 2) {
  6969     // With AVX2, use 32-byte vector compare
  7416     // With AVX2, use 32-byte vector compare
  6970     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
  7417     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
  6971 
  7418 
  6972     // Compare 32-byte vectors
  7419     // Compare 32-byte vectors
  6973     andl(result, 0x0000001e);  //   tail count (in bytes)
  7420     andl(result, 0x0000001f);  //   tail count (in bytes)
  6974     andl(limit, 0xffffffe0);   // vector count (in bytes)
  7421     andl(limit, 0xffffffe0);   // vector count (in bytes)
  6975     jccb(Assembler::zero, COMPARE_TAIL);
  7422     jccb(Assembler::zero, COMPARE_TAIL);
  6976 
  7423 
  6977     lea(ary1, Address(ary1, limit, Address::times_1));
  7424     lea(ary1, Address(ary1, limit, Address::times_1));
  6978     lea(ary2, Address(ary2, limit, Address::times_1));
  7425     lea(ary2, Address(ary2, limit, Address::times_1));
  7005   } else if (UseSSE42Intrinsics) {
  7452   } else if (UseSSE42Intrinsics) {
  7006     // With SSE4.2, use double quad vector compare
  7453     // With SSE4.2, use double quad vector compare
  7007     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
  7454     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
  7008 
  7455 
  7009     // Compare 16-byte vectors
  7456     // Compare 16-byte vectors
  7010     andl(result, 0x0000000e);  //   tail count (in bytes)
  7457     andl(result, 0x0000000f);  //   tail count (in bytes)
  7011     andl(limit, 0xfffffff0);   // vector count (in bytes)
  7458     andl(limit, 0xfffffff0);   // vector count (in bytes)
  7012     jccb(Assembler::zero, COMPARE_TAIL);
  7459     jccb(Assembler::zero, COMPARE_TAIL);
  7013 
  7460 
  7014     lea(ary1, Address(ary1, limit, Address::times_1));
  7461     lea(ary1, Address(ary1, limit, Address::times_1));
  7015     lea(ary2, Address(ary2, limit, Address::times_1));
  7462     lea(ary2, Address(ary2, limit, Address::times_1));
  7057   jcc(Assembler::notZero, COMPARE_VECTORS);
  7504   jcc(Assembler::notZero, COMPARE_VECTORS);
  7058 
  7505 
  7059   // Compare trailing char (final 2 bytes), if any
  7506   // Compare trailing char (final 2 bytes), if any
  7060   bind(COMPARE_CHAR);
  7507   bind(COMPARE_CHAR);
  7061   testl(result, 0x2);   // tail  char
  7508   testl(result, 0x2);   // tail  char
  7062   jccb(Assembler::zero, TRUE_LABEL);
  7509   jccb(Assembler::zero, COMPARE_BYTE);
  7063   load_unsigned_short(chr, Address(ary1, 0));
  7510   load_unsigned_short(chr, Address(ary1, 0));
  7064   load_unsigned_short(limit, Address(ary2, 0));
  7511   load_unsigned_short(limit, Address(ary2, 0));
  7065   cmpl(chr, limit);
  7512   cmpl(chr, limit);
  7066   jccb(Assembler::notEqual, FALSE_LABEL);
  7513   jccb(Assembler::notEqual, FALSE_LABEL);
  7067 
  7514 
       
  7515   if (is_array_equ && is_char) {
       
  7516     bind(COMPARE_BYTE);
       
  7517   } else {
       
  7518     lea(ary1, Address(ary1, 2));
       
  7519     lea(ary2, Address(ary2, 2));
       
  7520 
       
  7521     bind(COMPARE_BYTE);
       
  7522     testl(result, 0x1);   // tail  byte
       
  7523     jccb(Assembler::zero, TRUE_LABEL);
       
  7524     load_unsigned_byte(chr, Address(ary1, 0));
       
  7525     load_unsigned_byte(limit, Address(ary2, 0));
       
  7526     cmpl(chr, limit);
       
  7527     jccb(Assembler::notEqual, FALSE_LABEL);
       
  7528   }
  7068   bind(TRUE_LABEL);
  7529   bind(TRUE_LABEL);
  7069   movl(result, 1);   // return true
  7530   movl(result, 1);   // return true
  7070   jmpb(DONE);
  7531   jmpb(DONE);
  7071 
  7532 
  7072   bind(FALSE_LABEL);
  7533   bind(FALSE_LABEL);
  7078     // clean upper bits of YMM registers
  7539     // clean upper bits of YMM registers
  7079     vpxor(vec1, vec1);
  7540     vpxor(vec1, vec1);
  7080     vpxor(vec2, vec2);
  7541     vpxor(vec2, vec2);
  7081   }
  7542   }
  7082 }
  7543 }
       
  7544 
       
  7545 #endif
  7083 
  7546 
  7084 void MacroAssembler::generate_fill(BasicType t, bool aligned,
  7547 void MacroAssembler::generate_fill(BasicType t, bool aligned,
  7085                                    Register to, Register value, Register count,
  7548                                    Register to, Register value, Register count,
  7086                                    Register rtmp, XMMRegister xtmp) {
  7549                                    Register rtmp, XMMRegister xtmp) {
  7087   ShortBranchVerifier sbv(this);
  7550   ShortBranchVerifier sbv(this);
  9083 #endif // LP64
  9546 #endif // LP64
  9084 #undef BIND
  9547 #undef BIND
  9085 #undef BLOCK_COMMENT
  9548 #undef BLOCK_COMMENT
  9086 
  9549 
  9087 
  9550 
       
  9551 // Compress char[] array to byte[].
       
  9552 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
       
  9553                                          XMMRegister tmp1Reg, XMMRegister tmp2Reg,
       
  9554                                          XMMRegister tmp3Reg, XMMRegister tmp4Reg,
       
  9555                                          Register tmp5, Register result) {
       
  9556   Label copy_chars_loop, return_length, return_zero, done;
       
  9557 
       
  9558   // rsi: src
       
  9559   // rdi: dst
       
  9560   // rdx: len
       
  9561   // rcx: tmp5
       
  9562   // rax: result
       
  9563 
       
  9564   // rsi holds start addr of source char[] to be compressed
       
  9565   // rdi holds start addr of destination byte[]
       
  9566   // rdx holds length
       
  9567 
       
  9568   assert(len != result, "");
       
  9569 
       
  9570   // save length for return
       
  9571   push(len);
       
  9572 
       
  9573   if (UseSSE42Intrinsics) {
       
  9574     Label copy_32_loop, copy_16, copy_tail;
       
  9575 
       
  9576     movl(result, len);
       
  9577     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
       
  9578 
       
  9579     // vectored compression
       
  9580     andl(len, 0xfffffff0);    // vector count (in chars)
       
  9581     andl(result, 0x0000000f);    // tail count (in chars)
       
  9582     testl(len, len);
       
  9583     jccb(Assembler::zero, copy_16);
       
  9584 
       
  9585     // compress 16 chars per iter
       
  9586     movdl(tmp1Reg, tmp5);
       
  9587     pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
       
  9588     pxor(tmp4Reg, tmp4Reg);
       
  9589 
       
  9590     lea(src, Address(src, len, Address::times_2));
       
  9591     lea(dst, Address(dst, len, Address::times_1));
       
  9592     negptr(len);
       
  9593 
       
  9594     bind(copy_32_loop);
       
  9595     movdqu(tmp2Reg, Address(src, len, Address::times_2));     // load 1st 8 characters
       
  9596     por(tmp4Reg, tmp2Reg);
       
  9597     movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
       
  9598     por(tmp4Reg, tmp3Reg);
       
  9599     ptest(tmp4Reg, tmp1Reg);       // check for Unicode chars in next vector
       
  9600     jcc(Assembler::notZero, return_zero);
       
  9601     packuswb(tmp2Reg, tmp3Reg);    // only ASCII chars; compress each to 1 byte
       
  9602     movdqu(Address(dst, len, Address::times_1), tmp2Reg);
       
  9603     addptr(len, 16);
       
  9604     jcc(Assembler::notZero, copy_32_loop);
       
  9605 
       
  9606     // compress next vector of 8 chars (if any)
       
  9607     bind(copy_16);
       
  9608     movl(len, result);
       
  9609     andl(len, 0xfffffff8);    // vector count (in chars)
       
  9610     andl(result, 0x00000007);    // tail count (in chars)
       
  9611     testl(len, len);
       
  9612     jccb(Assembler::zero, copy_tail);
       
  9613 
       
  9614     movdl(tmp1Reg, tmp5);
       
  9615     pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
       
  9616     pxor(tmp3Reg, tmp3Reg);
       
  9617 
       
  9618     movdqu(tmp2Reg, Address(src, 0));
       
  9619     ptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in vector
       
  9620     jccb(Assembler::notZero, return_zero);
       
  9621     packuswb(tmp2Reg, tmp3Reg);    // only LATIN1 chars; compress each to 1 byte
       
  9622     movq(Address(dst, 0), tmp2Reg);
       
  9623     addptr(src, 16);
       
  9624     addptr(dst, 8);
       
  9625 
       
  9626     bind(copy_tail);
       
  9627     movl(len, result);
       
  9628   }
       
  9629   // compress 1 char per iter
       
  9630   testl(len, len);
       
  9631   jccb(Assembler::zero, return_length);
       
  9632   lea(src, Address(src, len, Address::times_2));
       
  9633   lea(dst, Address(dst, len, Address::times_1));
       
  9634   negptr(len);
       
  9635 
       
  9636   bind(copy_chars_loop);
       
  9637   load_unsigned_short(result, Address(src, len, Address::times_2));
       
  9638   testl(result, 0xff00);      // check if Unicode char
       
  9639   jccb(Assembler::notZero, return_zero);
       
  9640   movb(Address(dst, len, Address::times_1), result);  // ASCII char; compress to 1 byte
       
  9641   increment(len);
       
  9642   jcc(Assembler::notZero, copy_chars_loop);
       
  9643 
       
  9644   // if compression succeeded, return length
       
  9645   bind(return_length);
       
  9646   pop(result);
       
  9647   jmpb(done);
       
  9648 
       
  9649   // if compression failed, return 0
       
  9650   bind(return_zero);
       
  9651   xorl(result, result);
       
  9652   addptr(rsp, wordSize);
       
  9653 
       
  9654   bind(done);
       
  9655 }
       
  9656 
       
  9657 // Inflate byte[] array to char[].
       
  9658 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
       
  9659                                         XMMRegister tmp1, Register tmp2) {
       
  9660   Label copy_chars_loop, done;
       
  9661 
       
  9662   // rsi: src
       
  9663   // rdi: dst
       
  9664   // rdx: len
       
  9665   // rcx: tmp2
       
  9666 
       
  9667   // rsi holds start addr of source byte[] to be inflated
       
  9668   // rdi holds start addr of destination char[]
       
  9669   // rdx holds length
       
  9670   assert_different_registers(src, dst, len, tmp2);
       
  9671 
       
  9672   if (UseSSE42Intrinsics) {
       
  9673     Label copy_8_loop, copy_bytes, copy_tail;
       
  9674 
       
  9675     movl(tmp2, len);
       
  9676     andl(tmp2, 0x00000007);   // tail count (in chars)
       
  9677     andl(len, 0xfffffff8);    // vector count (in chars)
       
  9678     jccb(Assembler::zero, copy_tail);
       
  9679 
       
  9680     // vectored inflation
       
  9681     lea(src, Address(src, len, Address::times_1));
       
  9682     lea(dst, Address(dst, len, Address::times_2));
       
  9683     negptr(len);
       
  9684 
       
  9685     // inflate 8 chars per iter
       
  9686     bind(copy_8_loop);
       
  9687     pmovzxbw(tmp1, Address(src, len, Address::times_1));  // unpack to 8 words
       
  9688     movdqu(Address(dst, len, Address::times_2), tmp1);
       
  9689     addptr(len, 8);
       
  9690     jcc(Assembler::notZero, copy_8_loop);
       
  9691 
       
  9692     bind(copy_tail);
       
  9693     movl(len, tmp2);
       
  9694 
       
  9695     cmpl(len, 4);
       
  9696     jccb(Assembler::less, copy_bytes);
       
  9697 
       
  9698     movdl(tmp1, Address(src, 0));  // load 4 byte chars
       
  9699     pmovzxbw(tmp1, tmp1);
       
  9700     movq(Address(dst, 0), tmp1);
       
  9701     subptr(len, 4);
       
  9702     addptr(src, 4);
       
  9703     addptr(dst, 8);
       
  9704 
       
  9705     bind(copy_bytes);
       
  9706   }
       
  9707   testl(len, len);
       
  9708   jccb(Assembler::zero, done);
       
  9709   lea(src, Address(src, len, Address::times_1));
       
  9710   lea(dst, Address(dst, len, Address::times_2));
       
  9711   negptr(len);
       
  9712 
       
  9713   // inflate 1 char per iter
       
  9714   bind(copy_chars_loop);
       
  9715   load_unsigned_byte(tmp2, Address(src, len, Address::times_1));  // load byte char
       
  9716   movw(Address(dst, len, Address::times_2), tmp2);  // inflate byte char to word
       
  9717   increment(len);
       
  9718   jcc(Assembler::notZero, copy_chars_loop);
       
  9719 
       
  9720   bind(done);
       
  9721 }
       
  9722 
       
  9723 
  9088 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
  9724 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
  9089   switch (cond) {
  9725   switch (cond) {
  9090     // Note some conditions are synonyms for others
  9726     // Note some conditions are synonyms for others
  9091     case Assembler::zero:         return Assembler::notZero;
  9727     case Assembler::zero:         return Assembler::notZero;
  9092     case Assembler::notZero:      return Assembler::zero;
  9728     case Assembler::notZero:      return Assembler::zero;