6827605: new String intrinsics may prevent EA scalar replacement
authorkvn
Mon, 14 Sep 2009 12:14:20 -0700
changeset 3905 7d725029ac85
parent 3904 007a45522a7f
child 3906 6767b0c66883
6827605: new String intrinsics may prevent EA scalar replacement 6875866: Intrinsic for String.indexOf() is broken on x86 with SSE4.2 Summary: Modify String intrinsic methods to pass char[] pointers instead of string oops. Reviewed-by: never
hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
hotspot/src/cpu/sparc/vm/sparc.ad
hotspot/src/cpu/x86/vm/assembler_x86.cpp
hotspot/src/cpu/x86/vm/assembler_x86.hpp
hotspot/src/cpu/x86/vm/x86_32.ad
hotspot/src/cpu/x86/vm/x86_64.ad
hotspot/src/share/vm/adlc/formssel.cpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/matcher.cpp
hotspot/src/share/vm/opto/memnode.cpp
hotspot/src/share/vm/opto/memnode.hpp
hotspot/test/compiler/6875866/Test.java
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -4676,3 +4676,50 @@
     load_ptr_contents(base, G6_heapbase);
   }
 }
+
+// Compare char[] arrays aligned to 4 bytes.
+void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
+                                        Register limit, Register result,
+                                        Register chr1, Register chr2, Label& Ldone) {
+  Label Lvector, Lloop;
+  assert(chr1 == result, "should be the same");
+
+  // Note: limit contains number of bytes (2*char_elements) != 0.
+  andcc(limit, 0x2, chr1); // trailing character ?
+  br(Assembler::zero, false, Assembler::pt, Lvector);
+  delayed()->nop();
+
+  // compare the trailing char
+  sub(limit, sizeof(jchar), limit);
+  lduh(ary1, limit, chr1);
+  lduh(ary2, limit, chr2);
+  cmp(chr1, chr2);
+  br(Assembler::notEqual, true, Assembler::pt, Ldone);
+  delayed()->mov(G0, result);     // not equal
+
+  // only one char ?
+  br_on_reg_cond(rc_z, true, Assembler::pn, limit, Ldone);
+  delayed()->add(G0, 1, result); // zero-length arrays are equal
+
+  // word by word compare, dont't need alignment check
+  bind(Lvector);
+  // Shift ary1 and ary2 to the end of the arrays, negate limit
+  add(ary1, limit, ary1);
+  add(ary2, limit, ary2);
+  neg(limit, limit);
+
+  lduw(ary1, limit, chr1);
+  bind(Lloop);
+  lduw(ary2, limit, chr2);
+  cmp(chr1, chr2);
+  br(Assembler::notEqual, true, Assembler::pt, Ldone);
+  delayed()->mov(G0, result);     // not equal
+  inccc(limit, 2*sizeof(jchar));
+  // annul LDUW if branch is not taken to prevent access past end of array
+  br(Assembler::notZero, true, Assembler::pt, Lloop);
+  delayed()->lduw(ary1, limit, chr1); // hoisted
+
+  // Caller should set it:
+  // add(G0, 1, result); // equals
+}
+
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Sep 14 12:14:20 2009 -0700
@@ -2455,6 +2455,11 @@
   void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
   void inc_counter(int*    counter_addr, Register Rtmp1, Register Rtmp2);
 
+  // Compare char[] arrays aligned to 4 bytes.
+  void char_arrays_equals(Register ary1, Register ary2,
+                          Register limit, Register result,
+                          Register chr1, Register chr2, Label& Ldone);
+
 #undef VIRTUAL
 
 };
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Mon Sep 14 12:14:20 2009 -0700
@@ -2838,63 +2838,41 @@
   %}
 
 
-  enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
+  enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
     Label Ldone, Lloop;
     MacroAssembler _masm(&cbuf);
 
     Register   str1_reg = reg_to_register_object($str1$$reg);
     Register   str2_reg = reg_to_register_object($str2$$reg);
-    Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
-    Register   tmp2_reg = reg_to_register_object($tmp2$$reg);
+    Register   cnt1_reg = reg_to_register_object($cnt1$$reg);
+    Register   cnt2_reg = reg_to_register_object($cnt2$$reg);
     Register result_reg = reg_to_register_object($result$$reg);
 
-    // Get the first character position in both strings
-    //         [8] char array, [12] offset, [16] count
-    int  value_offset = java_lang_String:: value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int  count_offset = java_lang_String:: count_offset_in_bytes();
-
-    // load str1 (jchar*) base address into tmp1_reg
-    __ load_heap_oop(str1_reg, value_offset, tmp1_reg);
-    __ ld(str1_reg, offset_offset, result_reg);
-    __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg);
-    __   ld(str1_reg, count_offset, str1_reg); // hoisted
-    __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
-    __   load_heap_oop(str2_reg, value_offset, tmp2_reg); // hoisted
-    __ add(result_reg, tmp1_reg, tmp1_reg);
-
-    // load str2 (jchar*) base address into tmp2_reg
-    // __ ld_ptr(str2_reg, value_offset, tmp2_reg); // hoisted
-    __ ld(str2_reg, offset_offset, result_reg);
-    __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg);
-    __   ld(str2_reg, count_offset, str2_reg); // hoisted
-    __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
-    __   subcc(str1_reg, str2_reg, O7); // hoisted
-    __ add(result_reg, tmp2_reg, tmp2_reg);
+    assert(result_reg != str1_reg &&
+           result_reg != str2_reg &&
+           result_reg != cnt1_reg &&
+           result_reg != cnt2_reg ,
+           "need different registers");
 
     // Compute the minimum of the string lengths(str1_reg) and the
     // difference of the string lengths (stack)
 
-    // discard string base pointers, after loading up the lengths
-    // __ ld(str1_reg, count_offset, str1_reg); // hoisted
-    // __ ld(str2_reg, count_offset, str2_reg); // hoisted
-
     // See if the lengths are different, and calculate min in str1_reg.
     // Stash diff in O7 in case we need it for a tie-breaker.
     Label Lskip;
-    // __ subcc(str1_reg, str2_reg, O7); // hoisted
-    __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg); // scale the limit
+    __ subcc(cnt1_reg, cnt2_reg, O7);
+    __ sll(cnt1_reg, exact_log2(sizeof(jchar)), cnt1_reg); // scale the limit
     __ br(Assembler::greater, true, Assembler::pt, Lskip);
-    // str2 is shorter, so use its count:
-    __ delayed()->sll(str2_reg, exact_log2(sizeof(jchar)), str1_reg); // scale the limit
+    // cnt2 is shorter, so use its count:
+    __ delayed()->sll(cnt2_reg, exact_log2(sizeof(jchar)), cnt1_reg); // scale the limit
     __ bind(Lskip);
 
-    // reallocate str1_reg, str2_reg, result_reg
+    // reallocate cnt1_reg, cnt2_reg, result_reg
     // Note:  limit_reg holds the string length pre-scaled by 2
-    Register limit_reg =   str1_reg;
-    Register  chr2_reg =   str2_reg;
+    Register limit_reg =   cnt1_reg;
+    Register  chr2_reg =   cnt2_reg;
     Register  chr1_reg = result_reg;
-    // tmp{12} are the base pointers
+    // str{12} are the base pointers
 
     // Is the minimum length zero?
     __ cmp(limit_reg, (int)(0 * sizeof(jchar))); // use cast to resolve overloading ambiguity
@@ -2902,8 +2880,8 @@
     __ delayed()->mov(O7, result_reg);  // result is difference in lengths
 
     // Load first characters
-    __ lduh(tmp1_reg, 0, chr1_reg);
-    __ lduh(tmp2_reg, 0, chr2_reg);
+    __ lduh(str1_reg, 0, chr1_reg);
+    __ lduh(str2_reg, 0, chr2_reg);
 
     // Compare first characters
     __ subcc(chr1_reg, chr2_reg, chr1_reg);
@@ -2915,7 +2893,7 @@
       // Check after comparing first character to see if strings are equivalent
       Label LSkip2;
       // Check if the strings start at same location
-      __ cmp(tmp1_reg, tmp2_reg);
+      __ cmp(str1_reg, str2_reg);
       __ brx(Assembler::notEqual, true, Assembler::pt, LSkip2);
       __ delayed()->nop();
 
@@ -2932,23 +2910,23 @@
     __ br(Assembler::equal, true, Assembler::pn, Ldone);
     __ delayed()->mov(O7, result_reg);  // result is difference in lengths
 
-    // Shift tmp1_reg and tmp2_reg to the end of the arrays, negate limit
-    __ add(tmp1_reg, limit_reg, tmp1_reg);
-    __ add(tmp2_reg, limit_reg, tmp2_reg);
+    // Shift str1_reg and str2_reg to the end of the arrays, negate limit
+    __ add(str1_reg, limit_reg, str1_reg);
+    __ add(str2_reg, limit_reg, str2_reg);
     __ neg(chr1_reg, limit_reg);  // limit = -(limit-2)
 
     // Compare the rest of the characters
-    __ lduh(tmp1_reg, limit_reg, chr1_reg);
+    __ lduh(str1_reg, limit_reg, chr1_reg);
     __ bind(Lloop);
-    // __ lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted
-    __ lduh(tmp2_reg, limit_reg, chr2_reg);
+    // __ lduh(str1_reg, limit_reg, chr1_reg); // hoisted
+    __ lduh(str2_reg, limit_reg, chr2_reg);
     __ subcc(chr1_reg, chr2_reg, chr1_reg);
     __ br(Assembler::notZero, false, Assembler::pt, Ldone);
     assert(chr1_reg == result_reg, "result must be pre-placed");
     __ delayed()->inccc(limit_reg, sizeof(jchar));
     // annul LDUH if branch is not taken to prevent access past end of string
     __ br(Assembler::notZero, true, Assembler::pt, Lloop);
-    __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted
+    __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted
 
     // If strings are equal up to min length, return the length difference.
     __ mov(O7, result_reg);
@@ -2957,125 +2935,80 @@
     __ bind(Ldone);
   %}
 
-enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
-    Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
+enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result) %{
+    Label Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
     MacroAssembler _masm(&cbuf);
 
     Register   str1_reg = reg_to_register_object($str1$$reg);
     Register   str2_reg = reg_to_register_object($str2$$reg);
-    Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
-    Register   tmp2_reg = reg_to_register_object($tmp2$$reg);
+    Register    cnt_reg = reg_to_register_object($cnt$$reg);
+    Register   tmp1_reg = O7;
     Register result_reg = reg_to_register_object($result$$reg);
 
-    // Get the first character position in both strings
-    //         [8] char array, [12] offset, [16] count
-    int  value_offset = java_lang_String:: value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int  count_offset = java_lang_String:: count_offset_in_bytes();
-
-    // load str1 (jchar*) base address into tmp1_reg
-    __ load_heap_oop(Address(str1_reg, value_offset), tmp1_reg);
-    __ ld(Address(str1_reg, offset_offset), result_reg);
-    __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg);
-    __    ld(Address(str1_reg, count_offset), str1_reg); // hoisted
-    __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
-    __    load_heap_oop(Address(str2_reg, value_offset), tmp2_reg); // hoisted
-    __ add(result_reg, tmp1_reg, tmp1_reg);
-
-    // load str2 (jchar*) base address into tmp2_reg
-    // __ ld_ptr(Address(str2_reg, value_offset), tmp2_reg); // hoisted
-    __ ld(Address(str2_reg, offset_offset), result_reg);
-    __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg);
-    __    ld(Address(str2_reg, count_offset), str2_reg); // hoisted
-    __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
-    __   cmp(str1_reg, str2_reg); // hoisted
-    __ add(result_reg, tmp2_reg, tmp2_reg);
-
-    __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg);
-    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
-    __ delayed()->mov(G0, result_reg);    // not equal
-
-    __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone);
-    __ delayed()->add(G0, 1, result_reg); //equals
-
-    __ cmp(tmp1_reg, tmp2_reg); //same string ?
+    assert(result_reg != str1_reg &&
+           result_reg != str2_reg &&
+           result_reg !=  cnt_reg &&
+           result_reg != tmp1_reg ,
+           "need different registers");
+
+    __ cmp(str1_reg, str2_reg); //same char[] ?
     __ brx(Assembler::equal, true, Assembler::pn, Ldone);
     __ delayed()->add(G0, 1, result_reg);
 
+    __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, cnt_reg, Ldone);
+    __ delayed()->add(G0, 1, result_reg); // count == 0
+
     //rename registers
-    Register limit_reg =   str1_reg;
-    Register  chr2_reg =   str2_reg;
+    Register limit_reg =    cnt_reg;
     Register  chr1_reg = result_reg;
-    // tmp{12} are the base pointers
+    Register  chr2_reg =   tmp1_reg;
 
     //check for alignment and position the pointers to the ends
-    __ or3(tmp1_reg, tmp2_reg, chr1_reg);
-    __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned
-    __ br(Assembler::notZero, false, Assembler::pn, Lchar);
-    __ delayed()->nop();
-
-    __ bind(Lword);
-    __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2)
-    __ andn(limit_reg, 0x3, limit_reg);
-    __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word);
-    __ delayed()->nop();
-
-    __ add(tmp1_reg, limit_reg, tmp1_reg);
-    __ add(tmp2_reg, limit_reg, tmp2_reg);
-    __ neg(limit_reg);
-
-    __ lduw(tmp1_reg, limit_reg, chr1_reg);
-    __ bind(Lword_loop);
-    __ lduw(tmp2_reg, limit_reg, chr2_reg);
-    __ cmp(chr1_reg, chr2_reg);
-    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
-    __ delayed()->mov(G0, result_reg);
-    __ inccc(limit_reg, 2*sizeof(jchar));
-    // annul LDUW if branch i  s not taken to prevent access past end of string
-    __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken
-    __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted
-
-    __ bind(Lpost_word);
-    __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone);
-    __ delayed()->add(G0, 1, result_reg);
-
-    __ lduh(tmp1_reg, 0, chr1_reg);
-    __ lduh(tmp2_reg, 0, chr2_reg);
-    __ cmp (chr1_reg, chr2_reg);
-    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
-    __ delayed()->mov(G0, result_reg);
+    __ or3(str1_reg, str2_reg, chr1_reg);
+    __ andcc(chr1_reg, 0x3, chr1_reg);
+    // notZero means at least one not 4-byte aligned.
+    // We could optimize the case when both arrays are not aligned
+    // but it is not frequent case and it requires additional checks.
+    __ br(Assembler::notZero, false, Assembler::pn, Lchar); // char by char compare
+    __ delayed()->sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); // set byte count
+
+    // Compare char[] arrays aligned to 4 bytes.
+    __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
+                          chr1_reg, chr2_reg, Ldone);
     __ ba(false,Ldone);
     __ delayed()->add(G0, 1, result_reg);
 
+    // char by char compare
     __ bind(Lchar);
-    __ add(tmp1_reg, limit_reg, tmp1_reg);
-    __ add(tmp2_reg, limit_reg, tmp2_reg);
+    __ add(str1_reg, limit_reg, str1_reg);
+    __ add(str2_reg, limit_reg, str2_reg);
     __ neg(limit_reg); //negate count
 
-    __ lduh(tmp1_reg, limit_reg, chr1_reg);
+    __ lduh(str1_reg, limit_reg, chr1_reg);
+    // Lchar_loop
     __ bind(Lchar_loop);
-    __ lduh(tmp2_reg, limit_reg, chr2_reg);
+    __ lduh(str2_reg, limit_reg, chr2_reg);
     __ cmp(chr1_reg, chr2_reg);
     __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
     __ delayed()->mov(G0, result_reg); //not equal
     __ inccc(limit_reg, sizeof(jchar));
     // annul LDUH if branch is not taken to prevent access past end of string
-    __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken
-    __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted
+    __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop);
+    __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted
 
     __ add(G0, 1, result_reg);  //equal
 
     __ bind(Ldone);
   %}
 
-enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
+enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, notemp_iRegI result) %{
     Label Lvector, Ldone, Lloop;
     MacroAssembler _masm(&cbuf);
 
     Register   ary1_reg = reg_to_register_object($ary1$$reg);
     Register   ary2_reg = reg_to_register_object($ary2$$reg);
     Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
-    Register   tmp2_reg = reg_to_register_object($tmp2$$reg);
+    Register   tmp2_reg = O7;
     Register result_reg = reg_to_register_object($result$$reg);
 
     int length_offset  = arrayOopDesc::length_offset_in_bytes();
@@ -3101,7 +3034,7 @@
     __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
     __ delayed()->mov(G0, result_reg);     // not equal
 
-    __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone);
+    __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, tmp1_reg, Ldone);
     __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
 
     // load array addresses
@@ -3109,45 +3042,16 @@
     __ add(ary2_reg, base_offset, ary2_reg);
 
     // renaming registers
-    Register chr1_reg  =  tmp2_reg;   // for characters in ary1
-    Register chr2_reg  =  result_reg; // for characters in ary2
+    Register chr1_reg  =  result_reg; // for characters in ary1
+    Register chr2_reg  =  tmp2_reg;   // for characters in ary2
     Register limit_reg =  tmp1_reg;   // length
 
     // set byte count
     __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
-    __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ?
-    __ br(Assembler::zero, false, Assembler::pt, Lvector);
-    __ delayed()->nop();
-
-    //compare the trailing char
-    __ sub(limit_reg, sizeof(jchar), limit_reg);
-    __ lduh(ary1_reg, limit_reg, chr1_reg);
-    __ lduh(ary2_reg, limit_reg, chr2_reg);
-    __ cmp(chr1_reg, chr2_reg);
-    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
-    __ delayed()->mov(G0, result_reg);     // not equal
-
-    // only one char ?
-    __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone);
-    __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
-
-    __ bind(Lvector);
-    // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit
-    __ add(ary1_reg, limit_reg, ary1_reg);
-    __ add(ary2_reg, limit_reg, ary2_reg);
-    __ neg(limit_reg, limit_reg);
-
-    __ lduw(ary1_reg, limit_reg, chr1_reg);
-    __ bind(Lloop);
-    __ lduw(ary2_reg, limit_reg, chr2_reg);
-    __ cmp(chr1_reg, chr2_reg);
-    __ br(Assembler::notEqual, false, Assembler::pt, Ldone);
-    __ delayed()->mov(G0, result_reg);     // not equal
-    __ inccc(limit_reg, 2*sizeof(jchar));
-    // annul LDUW if branch is not taken to prevent access past end of string
-    __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken
-    __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted
-
+
+    // Compare char[] arrays aligned to 4 bytes.
+    __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
+                          chr1_reg, chr2_reg, Ldone);
     __ add(G0, 1, result_reg); // equals
 
     __ bind(Ldone);
@@ -9471,33 +9375,33 @@
   ins_pipe(long_memory_op);
 %}
 
-instruct string_compare(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
-                        o7RegI tmp3, flagsReg ccr) %{
-  match(Set result (StrComp str1 str2));
-  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3);
+instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
+                        o7RegI tmp, flagsReg ccr) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp);
   ins_cost(300);
-  format %{ "String Compare $str1,$str2 -> $result" %}
-  ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, result) );
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp" %}
+  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result) );
   ins_pipe(long_memory_op);
 %}
 
-instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
-                       o7RegI tmp3, flagsReg ccr) %{
-  match(Set result (StrEquals str1 str2));
-  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3);
+instruct string_equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result,
+                       o7RegI tmp, flagsReg ccr) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp, KILL ccr);
   ins_cost(300);
-  format %{ "String Equals $str1,$str2 -> $result" %}
-  ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) );
+  format %{ "String Equals $str1,$str2,$cnt -> $result   // KILL $tmp" %}
+  ins_encode( enc_String_Equals(str1, str2, cnt, result) );
   ins_pipe(long_memory_op);
 %}
 
-instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
-                        flagsReg ccr) %{
+instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result,
+                      o7RegI tmp2, flagsReg ccr) %{
   match(Set result (AryEq ary1 ary2));
   effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
   ins_cost(300);
-  format %{ "Array Equals $ary1,$ary2 -> $result" %}
-  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result));
+  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1,$tmp2" %}
+  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, result));
   ins_pipe(long_memory_op);
 %}
 
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -8404,6 +8404,319 @@
 }
 #endif // _LP64
 
+// IndexOf substring.
+void MacroAssembler::string_indexof(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    XMMRegister vec, Register tmp) {
+  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+
+  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
+        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
+
+  push(str1); // string addr
+  push(str2); // substr addr
+  push(cnt2); // substr count
+  jmpb(PREP_FOR_SCAN);
+
+  // Substr count saved at sp
+  // Substr saved at sp+1*wordSize
+  // String saved at sp+2*wordSize
+
+  // Reload substr for rescan
+  bind(RELOAD_SUBSTR);
+  movl(cnt2, Address(rsp, 0));
+  movptr(str2, Address(rsp, wordSize));
+  // We came here after the beginninig of the substring was
+  // matched but the rest of it was not so we need to search
+  // again. Start from the next element after the previous match.
+  subptr(str1, result); // Restore counter
+  shrl(str1, 1);
+  addl(cnt1, str1);
+  lea(str1, Address(result, 2)); // Reload string
+
+  // Load substr
+  bind(PREP_FOR_SCAN);
+  movdqu(vec, Address(str2, 0));
+  addl(cnt1, 8);  // prime the loop
+  subptr(str1, 16);
+
+  // Scan string for substr in 16-byte vectors
+  bind(SCAN_TO_SUBSTR);
+  subl(cnt1, 8);
+  addptr(str1, 16);
+
+  // pcmpestri
+  //   inputs:
+  //     xmm - substring
+  //     rax - substring length (elements count)
+  //     mem - scaned string
+  //     rdx - string length (elements count)
+  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //   outputs:
+  //     rcx - matched index in string
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+  pcmpestri(vec, Address(str1, 0), 0x0d);
+  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
+  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
+
+  // Fallthrough: found a potential substr
+
+  // Make sure string is still long enough
+  subl(cnt1, tmp);
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::negative, RET_NOT_FOUND);
+  // Compute start addr of substr
+  lea(str1, Address(str1, tmp, Address::times_2));
+  movptr(result, str1); // save
+
+  // Compare potential substr
+  addl(cnt1, 8);     // prime the loop
+  addl(cnt2, 8);
+  subptr(str1, 16);
+  subptr(str2, 16);
+
+  // Scan 16-byte vectors of string and substr
+  bind(SCAN_SUBSTR);
+  subl(cnt1, 8);
+  subl(cnt2, 8);
+  addptr(str1, 16);
+  addptr(str2, 16);
+  movdqu(vec, Address(str2, 0));
+  pcmpestri(vec, Address(str1, 0), 0x0d);
+  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
+
+  // Compute substr offset
+  subptr(result, Address(rsp, 2*wordSize));
+  shrl(result, 1); // index
+  jmpb(CLEANUP);
+
+  bind(RET_NOT_FOUND);
+  movl(result, -1);
+
+  bind(CLEANUP);
+  addptr(rsp, 3*wordSize);
+}
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    XMMRegister vec1, XMMRegister vec2) {
+  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
+
+  // Compute the minimum of the string lengths and the
+  // difference of the string lengths (stack).
+  // Do the conditional move stuff
+  movl(result, cnt1);
+  subl(cnt1, cnt2);
+  push(cnt1);
+  if (VM_Version::supports_cmov()) {
+    cmovl(Assembler::lessEqual, cnt2, result);
+  } else {
+    Label GT_LABEL;
+    jccb(Assembler::greater, GT_LABEL);
+    movl(cnt2, result);
+    bind(GT_LABEL);
+  }
+
+  // Is the minimum length zero?
+  testl(cnt2, cnt2);
+  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+  // Load first characters
+  load_unsigned_short(result, Address(str1, 0));
+  load_unsigned_short(cnt1, Address(str2, 0));
+
+  // Compare first characters
+  subl(result, cnt1);
+  jcc(Assembler::notZero,  POP_LABEL);
+  decrementl(cnt2);
+  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+  {
+    // Check after comparing first character to see if strings are equivalent
+    Label LSkip2;
+    // Check if the strings start at same location
+    cmpptr(str1, str2);
+    jccb(Assembler::notEqual, LSkip2);
+
+    // Check if the length difference is zero (from stack)
+    cmpl(Address(rsp, 0), 0x0);
+    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
+
+    // Strings might not be equivalent
+    bind(LSkip2);
+  }
+
+  // Advance to next character
+  addptr(str1, 2);
+  addptr(str2, 2);
+
+  if (UseSSE42Intrinsics) {
+    // With SSE4.2, use double quad vector compare
+    Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
+    // Setup to compare 16-byte vectors
+    movl(cnt1, cnt2);
+    andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
+    andl(cnt1, 0x00000007); // cnt1 holds the tail count
+    testl(cnt2, cnt2);
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(str2, Address(str2, cnt2, Address::times_2));
+    lea(str1, Address(str1, cnt2, Address::times_2));
+    negptr(cnt2);
+
+    bind(COMPARE_VECTORS);
+    movdqu(vec1, Address(str1, cnt2, Address::times_2));
+    movdqu(vec2, Address(str2, cnt2, Address::times_2));
+    pxor(vec1, vec2);
+    ptest(vec1, vec1);
+    jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
+    addptr(cnt2, 8);
+    jcc(Assembler::notZero, COMPARE_VECTORS);
+    jmpb(COMPARE_TAIL);
+
+    // Mismatched characters in the vectors
+    bind(VECTOR_NOT_EQUAL);
+    lea(str1, Address(str1, cnt2, Address::times_2));
+    lea(str2, Address(str2, cnt2, Address::times_2));
+    movl(cnt1, 8);
+
+    // Compare tail (< 8 chars), or rescan last vectors to
+    // find 1st mismatched characters
+    bind(COMPARE_TAIL);
+    testl(cnt1, cnt1);
+    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+    movl(cnt2, cnt1);
+    // Fallthru to tail compare
+  }
+
+  // Shift str2 and str1 to the end of the arrays, negate min
+  lea(str1, Address(str1, cnt2, Address::times_2, 0));
+  lea(str2, Address(str2, cnt2, Address::times_2, 0));
+  negptr(cnt2);
+
+    // Compare the rest of the characters
+  bind(WHILE_HEAD_LABEL);
+  load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
+  load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
+  subl(result, cnt1);
+  jccb(Assembler::notZero, POP_LABEL);
+  increment(cnt2);
+  jcc(Assembler::notZero, WHILE_HEAD_LABEL);
+
+  // Strings are equal up to min length.  Return the length difference.
+  bind(LENGTH_DIFF_LABEL);
+  pop(result);
+  jmpb(DONE_LABEL);
+
+  // Discard the stored length difference
+  bind(POP_LABEL);
+  addptr(rsp, wordSize);
+
+  // That's it
+  bind(DONE_LABEL);
+}
+
+// Compare char[] arrays aligned to 4 bytes or substrings.
+void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                                        Register limit, Register result, Register chr,
+                                        XMMRegister vec1, XMMRegister vec2) {
+  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
+
+  int length_offset  = arrayOopDesc::length_offset_in_bytes();
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+  // Check the input args
+  cmpptr(ary1, ary2);
+  jcc(Assembler::equal, TRUE_LABEL);
+
+  if (is_array_equ) {
+    // Need additional checks for arrays_equals.
+    andptr(ary1, ary2);
+    jcc(Assembler::zero, FALSE_LABEL); // One pointer is NULL
+
+    // Check the lengths
+    movl(limit, Address(ary1, length_offset));
+    cmpl(limit, Address(ary2, length_offset));
+    jcc(Assembler::notEqual, FALSE_LABEL);
+  }
+
+  // count == 0
+  testl(limit, limit);
+  jcc(Assembler::zero, TRUE_LABEL);
+
+  if (is_array_equ) {
+    // Load array address
+    lea(ary1, Address(ary1, base_offset));
+    lea(ary2, Address(ary2, base_offset));
+  }
+
+  shll(limit, 1);      // byte count != 0
+  movl(result, limit); // copy
+
+  if (UseSSE42Intrinsics) {
+    // With SSE4.2, use double quad vector compare
+    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+    // Compare 16-byte vectors
+    andl(result, 0x0000000e);  //   tail count (in bytes)
+    andl(limit, 0xfffffff0);   // vector count (in bytes)
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(ary1, Address(ary1, limit, Address::times_1));
+    lea(ary2, Address(ary2, limit, Address::times_1));
+    negptr(limit);
+
+    bind(COMPARE_WIDE_VECTORS);
+    movdqu(vec1, Address(ary1, limit, Address::times_1));
+    movdqu(vec2, Address(ary2, limit, Address::times_1));
+    pxor(vec1, vec2);
+    ptest(vec1, vec1);
+    jccb(Assembler::notZero, FALSE_LABEL);
+    addptr(limit, 16);
+    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    bind(COMPARE_TAIL); // limit is zero
+    movl(limit, result);
+    // Fallthru to tail compare
+  }
+
+  // Compare 4-byte vectors
+  andl(limit, 0xfffffffc); // vector count (in bytes)
+  jccb(Assembler::zero, COMPARE_CHAR);
+
+  lea(ary1, Address(ary1, limit, Address::times_1));
+  lea(ary2, Address(ary2, limit, Address::times_1));
+  negptr(limit);
+
+  bind(COMPARE_VECTORS);
+  movl(chr, Address(ary1, limit, Address::times_1));
+  cmpl(chr, Address(ary2, limit, Address::times_1));
+  jccb(Assembler::notEqual, FALSE_LABEL);
+  addptr(limit, 4);
+  jcc(Assembler::notZero, COMPARE_VECTORS);
+
+  // Compare trailing char (final 2 bytes), if any
+  bind(COMPARE_CHAR);
+  testl(result, 0x2);   // tail  char
+  jccb(Assembler::zero, TRUE_LABEL);
+  load_unsigned_short(chr, Address(ary1, 0));
+  load_unsigned_short(limit, Address(ary2, 0));
+  cmpl(chr, limit);
+  jccb(Assembler::notEqual, FALSE_LABEL);
+
+  bind(TRUE_LABEL);
+  movl(result, 1);   // return true
+  jmpb(DONE);
+
+  bind(FALSE_LABEL);
+  xorl(result, result); // return false
+
+  // That's it
+  bind(DONE);
+}
+
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   switch (cond) {
     // Note some conditions are synonyms for others
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Mon Sep 14 12:14:20 2009 -0700
@@ -2206,6 +2206,20 @@
   void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
   void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
 
+  // IndexOf strings.
+  void string_indexof(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      XMMRegister vec, Register tmp);
+
+  // Compare strings.
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      XMMRegister vec1, XMMRegister vec2);
+
+  // Compare char[] arrays.
+  void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                          Register limit, Register result, Register chr,
+                          XMMRegister vec1, XMMRegister vec2);
 
 #undef VIRTUAL
 
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Mon Sep 14 12:14:20 2009 -0700
@@ -3701,458 +3701,6 @@
     }
   %}
 
-  enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
-                        eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{
-    Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL,
-          POP_LABEL, DONE_LABEL, CONT_LABEL,
-          WHILE_HEAD_LABEL;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
-
-    // Get the first character position in both strings
-    //         [8] char array, [12] offset, [16] count
-    int value_offset  = java_lang_String::value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int count_offset  = java_lang_String::count_offset_in_bytes();
-    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    masm.movptr(rax, Address(rsi, value_offset));
-    masm.movl(rcx, Address(rsi, offset_offset));
-    masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
-    masm.movptr(rbx, Address(rdi, value_offset));
-    masm.movl(rcx, Address(rdi, offset_offset));
-    masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
-
-    // Compute the minimum of the string lengths(rsi) and the
-    // difference of the string lengths (stack)
-
-    if (VM_Version::supports_cmov()) {
-      masm.movl(rdi, Address(rdi, count_offset));
-      masm.movl(rsi, Address(rsi, count_offset));
-      masm.movl(rcx, rdi);
-      masm.subl(rdi, rsi);
-      masm.push(rdi);
-      masm.cmovl(Assembler::lessEqual, rsi, rcx);
-    } else {
-      masm.movl(rdi, Address(rdi, count_offset));
-      masm.movl(rcx, Address(rsi, count_offset));
-      masm.movl(rsi, rdi);
-      masm.subl(rdi, rcx);
-      masm.push(rdi);
-      masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL);
-      masm.movl(rsi, rcx);
-      // rsi holds min, rcx is unused
-    }
-
-    // Is the minimum length zero?
-    masm.bind(ECX_GOOD_LABEL);
-    masm.testl(rsi, rsi);
-    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-    // Load first characters
-    masm.load_unsigned_short(rcx, Address(rbx, 0));
-    masm.load_unsigned_short(rdi, Address(rax, 0));
-
-    // Compare first characters
-    masm.subl(rcx, rdi);
-    masm.jcc(Assembler::notZero,  POP_LABEL);
-    masm.decrementl(rsi);
-    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-    {
-      // Check after comparing first character to see if strings are equivalent
-      Label LSkip2;
-      // Check if the strings start at same location
-      masm.cmpptr(rbx,rax);
-      masm.jccb(Assembler::notEqual, LSkip2);
-
-      // Check if the length difference is zero (from stack)
-      masm.cmpl(Address(rsp, 0), 0x0);
-      masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
-
-      // Strings might not be equivalent
-      masm.bind(LSkip2);
-    }
-
-   // Advance to next character
-    masm.addptr(rax, 2);
-    masm.addptr(rbx, 2);
-
-    if (UseSSE42Intrinsics) {
-      // With SSE4.2, use double quad vector compare
-      Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
-      // Setup to compare 16-byte vectors
-      masm.movl(rdi, rsi);
-      masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
-      masm.andl(rdi, 0x00000007); // rdi holds the tail count
-      masm.testl(rsi, rsi);
-      masm.jccb(Assembler::zero, COMPARE_TAIL);
-
-      masm.lea(rax, Address(rax, rsi, Address::times_2));
-      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
-      masm.negl(rsi);
-
-      masm.bind(COMPARE_VECTORS);
-      masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
-      masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
-      masm.pxor(tmp1Reg, tmp2Reg);
-      masm.ptest(tmp1Reg, tmp1Reg);
-      masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
-      masm.addl(rsi, 8);
-      masm.jcc(Assembler::notZero, COMPARE_VECTORS);
-      masm.jmpb(COMPARE_TAIL);
-
-      // Mismatched characters in the vectors
-      masm.bind(VECTOR_NOT_EQUAL);
-      masm.lea(rax, Address(rax, rsi, Address::times_2));
-      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
-      masm.movl(rdi, 8);
-
-      // Compare tail (< 8 chars), or rescan last vectors to
-      // find 1st mismatched characters
-      masm.bind(COMPARE_TAIL);
-      masm.testl(rdi, rdi);
-      masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
-      masm.movl(rsi, rdi);
-      // Fallthru to tail compare
-    }
-
-    //Shift rax, and rbx, to the end of the arrays, negate min
-    masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
-    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
-    masm.negl(rsi);
-
-    // Compare the rest of the characters
-    masm.bind(WHILE_HEAD_LABEL);
-    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
-    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
-    masm.subl(rcx, rdi);
-    masm.jccb(Assembler::notZero, POP_LABEL);
-    masm.incrementl(rsi);
-    masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
-
-    // Strings are equal up to min length.  Return the length difference.
-    masm.bind(LENGTH_DIFF_LABEL);
-    masm.pop(rcx);
-    masm.jmpb(DONE_LABEL);
-
-    // Discard the stored length difference
-    masm.bind(POP_LABEL);
-    masm.addptr(rsp, 4);
-
-    // That's it
-    masm.bind(DONE_LABEL);
-  %}
-
- enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
-                       eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{
-    Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
-
-    int value_offset  = java_lang_String::value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int count_offset  = java_lang_String::count_offset_in_bytes();
-    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // does source == target string?
-    masm.cmpptr(rdi, rsi);
-    masm.jcc(Assembler::equal, RET_TRUE);
-
-    // get and compare counts
-    masm.movl(rcx, Address(rdi, count_offset));
-    masm.movl(rax, Address(rsi, count_offset));
-    masm.cmpl(rcx, rax);
-    masm.jcc(Assembler::notEqual, RET_FALSE);
-    masm.testl(rax, rax);
-    masm.jcc(Assembler::zero, RET_TRUE);
-
-    // get source string offset and value
-    masm.movptr(rbx, Address(rsi, value_offset));
-    masm.movl(rax, Address(rsi, offset_offset));
-    masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset));
-
-    // get compare string offset and value
-    masm.movptr(rbx, Address(rdi, value_offset));
-    masm.movl(rax, Address(rdi, offset_offset));
-    masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset));
-
-    // Set byte count
-    masm.shll(rcx, 1);
-    masm.movl(rax, rcx);
-
-    if (UseSSE42Intrinsics) {
-      // With SSE4.2, use double quad vector compare
-      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-      // Compare 16-byte vectors
-      masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
-      masm.andl(rax, 0x0000000e);  // tail count (in bytes)
-      masm.testl(rcx, rcx);
-      masm.jccb(Assembler::zero, COMPARE_TAIL);
-      masm.lea(rdi, Address(rdi, rcx, Address::times_1));
-      masm.lea(rsi, Address(rsi, rcx, Address::times_1));
-      masm.negl(rcx);
-
-      masm.bind(COMPARE_WIDE_VECTORS);
-      masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
-      masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
-      masm.pxor(tmp1Reg, tmp2Reg);
-      masm.ptest(tmp1Reg, tmp1Reg);
-      masm.jccb(Assembler::notZero, RET_FALSE);
-      masm.addl(rcx, 16);
-      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-      masm.bind(COMPARE_TAIL);
-      masm.movl(rcx, rax);
-      // Fallthru to tail compare
-    }
-
-    // Compare 4-byte vectors
-    masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
-    masm.andl(rax, 0x00000002);  // tail char (in bytes)
-    masm.testl(rcx, rcx);
-    masm.jccb(Assembler::zero, COMPARE_CHAR);
-    masm.lea(rdi, Address(rdi, rcx, Address::times_1));
-    masm.lea(rsi, Address(rsi, rcx, Address::times_1));
-    masm.negl(rcx);
-
-    masm.bind(COMPARE_VECTORS);
-    masm.movl(rbx, Address(rdi, rcx, Address::times_1));
-    masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
-    masm.jccb(Assembler::notEqual, RET_FALSE);
-    masm.addl(rcx, 4);
-    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
-
-    // Compare trailing char (final 2 bytes), if any
-    masm.bind(COMPARE_CHAR);
-    masm.testl(rax, rax);
-    masm.jccb(Assembler::zero, RET_TRUE);
-    masm.load_unsigned_short(rbx, Address(rdi, 0));
-    masm.load_unsigned_short(rcx, Address(rsi, 0));
-    masm.cmpl(rbx, rcx);
-    masm.jccb(Assembler::notEqual, RET_FALSE);
-
-    masm.bind(RET_TRUE);
-    masm.movl(rax, 1);   // return true
-    masm.jmpb(DONE);
-
-    masm.bind(RET_FALSE);
-    masm.xorl(rax, rax); // return false
-
-    masm.bind(DONE);
-    %}
-
- enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
-                        eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{
-    // SSE4.2 version
-    Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
-          SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-
-    // Get the first character position in both strings
-    //         [8] char array, [12] offset, [16] count
-    int value_offset  = java_lang_String::value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int count_offset  = java_lang_String::count_offset_in_bytes();
-    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // Get counts for string and substr
-    masm.movl(rdx, Address(rsi, count_offset));
-    masm.movl(rax, Address(rdi, count_offset));
-    // Check for substr count > string count
-    masm.cmpl(rax, rdx);
-    masm.jcc(Assembler::greater, RET_NEG_ONE);
-
-    // Start the indexOf operation
-    // Get start addr of string
-    masm.movptr(rbx, Address(rsi, value_offset));
-    masm.movl(rcx, Address(rsi, offset_offset));
-    masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
-    masm.push(rsi);
-
-    // Get start addr of substr
-    masm.movptr(rbx, Address(rdi, value_offset));
-    masm.movl(rcx, Address(rdi, offset_offset));
-    masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
-    masm.push(rdi);
-    masm.push(rax);
-    masm.jmpb(PREP_FOR_SCAN);
-
-    // Substr count saved at sp
-    // Substr saved at sp+4
-    // String saved at sp+8
-
-    // Prep to load substr for scan
-    masm.bind(LOAD_SUBSTR);
-    masm.movptr(rdi, Address(rsp, 4));
-    masm.movl(rax, Address(rsp, 0));
-
-    // Load substr
-    masm.bind(PREP_FOR_SCAN);
-    masm.movdqu(tmp1Reg, Address(rdi, 0));
-    masm.addl(rdx, 8);        // prime the loop
-    masm.subptr(rsi, 16);
-
-    // Scan string for substr in 16-byte vectors
-    masm.bind(SCAN_TO_SUBSTR);
-    masm.subl(rdx, 8);
-    masm.addptr(rsi, 16);
-    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
-    masm.jcc(Assembler::above, SCAN_TO_SUBSTR);     // CF == 0 && ZF == 0
-    masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
-
-    // Fallthru: found a potential substr
-
-    // Make sure string is still long enough
-    masm.subl(rdx, rcx);
-    masm.cmpl(rdx, rax);
-    masm.jccb(Assembler::negative, RET_NOT_FOUND);
-    // Compute start addr of substr
-    masm.lea(rsi, Address(rsi, rcx, Address::times_2));
-    masm.movptr(rbx, rsi);
-
-    // Compare potential substr
-    masm.addl(rdx, 8);        // prime the loop
-    masm.addl(rax, 8);
-    masm.subptr(rsi, 16);
-    masm.subptr(rdi, 16);
-
-    // Scan 16-byte vectors of string and substr
-    masm.bind(SCAN_SUBSTR);
-    masm.subl(rax, 8);
-    masm.subl(rdx, 8);
-    masm.addptr(rsi, 16);
-    masm.addptr(rdi, 16);
-    masm.movdqu(tmp1Reg, Address(rdi, 0));
-    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
-    masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
-    masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
-
-    // Compute substr offset
-    masm.movptr(rsi, Address(rsp, 8));
-    masm.subptr(rbx, rsi);
-    masm.shrl(rbx, 1);
-    masm.jmpb(CLEANUP);
-
-    masm.bind(RET_NEG_ONE);
-    masm.movl(rbx, -1);
-    masm.jmpb(DONE);
-
-    masm.bind(RET_NOT_FOUND);
-    masm.movl(rbx, -1);
-
-    masm.bind(CLEANUP);
-    masm.addptr(rsp, 12);
-
-    masm.bind(DONE);
-  %}
-
-  enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2,
-                             eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{
-    Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
-    Register ary1Reg      = as_Register($ary1$$reg);
-    Register ary2Reg      = as_Register($ary2$$reg);
-    Register tmp3Reg      = as_Register($tmp3$$reg);
-    Register tmp4Reg      = as_Register($tmp4$$reg);
-    Register resultReg    = as_Register($result$$reg);
-
-    int length_offset  = arrayOopDesc::length_offset_in_bytes();
-    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // Check the input args
-    masm.cmpptr(ary1Reg, ary2Reg);
-    masm.jcc(Assembler::equal, TRUE_LABEL);
-    masm.testptr(ary1Reg, ary1Reg);
-    masm.jcc(Assembler::zero, FALSE_LABEL);
-    masm.testptr(ary2Reg, ary2Reg);
-    masm.jcc(Assembler::zero, FALSE_LABEL);
-
-    // Check the lengths
-    masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
-    masm.movl(resultReg, Address(ary2Reg, length_offset));
-    masm.cmpl(tmp4Reg, resultReg);
-    masm.jcc(Assembler::notEqual, FALSE_LABEL);
-    masm.testl(resultReg, resultReg);
-    masm.jcc(Assembler::zero, TRUE_LABEL);
-
-    // Load array addrs
-    masm.lea(ary1Reg, Address(ary1Reg, base_offset));
-    masm.lea(ary2Reg, Address(ary2Reg, base_offset));
-
-    // Set byte count
-    masm.shll(tmp4Reg, 1);
-    masm.movl(resultReg, tmp4Reg);
-
-    if (UseSSE42Intrinsics) {
-      // With SSE4.2, use double quad vector compare
-      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-      // Compare 16-byte vectors
-      masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
-      masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
-      masm.testl(tmp4Reg, tmp4Reg);
-      masm.jccb(Assembler::zero, COMPARE_TAIL);
-      masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-      masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-      masm.negl(tmp4Reg);
-
-      masm.bind(COMPARE_WIDE_VECTORS);
-      masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-      masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-      masm.pxor(tmp1Reg, tmp2Reg);
-      masm.ptest(tmp1Reg, tmp1Reg);
-
-      masm.jccb(Assembler::notZero, FALSE_LABEL);
-      masm.addl(tmp4Reg, 16);
-      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-      masm.bind(COMPARE_TAIL);
-      masm.movl(tmp4Reg, resultReg);
-      // Fallthru to tail compare
-    }
-
-    // Compare 4-byte vectors
-    masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
-    masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
-    masm.testl(tmp4Reg, tmp4Reg);
-    masm.jccb(Assembler::zero, COMPARE_CHAR);
-    masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-    masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-    masm.negl(tmp4Reg);
-
-    masm.bind(COMPARE_VECTORS);
-    masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-    masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-    masm.jccb(Assembler::notEqual, FALSE_LABEL);
-    masm.addl(tmp4Reg, 4);
-    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
-
-    // Compare trailing char (final 2 bytes), if any
-    masm.bind(COMPARE_CHAR);
-    masm.testl(resultReg, resultReg);
-    masm.jccb(Assembler::zero, TRUE_LABEL);
-    masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
-    masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
-    masm.cmpl(tmp3Reg, tmp4Reg);
-    masm.jccb(Assembler::notEqual, FALSE_LABEL);
-
-    masm.bind(TRUE_LABEL);
-    masm.movl(resultReg, 1);   // return true
-    masm.jmpb(DONE);
-
-    masm.bind(FALSE_LABEL);
-    masm.xorl(resultReg, resultReg); // return false
-
-    // That's it
-    masm.bind(DONE);
-  %}
 
   enc_class enc_pop_rdx() %{
     emit_opcode(cbuf,0x5A);
@@ -12718,48 +12266,64 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
-                        eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{
-  match(Set result (StrComp str1 str2));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "String Compare $str1,$str2 -> $result    // KILL EAX, EBX" %}
-  ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
+instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
+                        eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // fast string equals
-instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
-                       eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
-  match(Set result (StrEquals str1 str2));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
-
-  format %{ "String Equals $str1,$str2 -> $result    // KILL EBX, ECX" %}
-  ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
-                        eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{
+instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
+                       regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
+
+  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
+  ins_encode %{
+    __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
+                          $cnt$$Register, $result$$Register, $tmp3$$Register,
+                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
+                        eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
   predicate(UseSSE42Intrinsics);
-  match(Set result (StrIndexOf str1 str2));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
-
-  format %{ "String IndexOf $str1,$str2 -> $result    // KILL EAX, ECX, EDX" %}
-  ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, $tmp2$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // fast array equals
-instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3,
-                      eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
+instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
+                      regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+%{
   match(Set result (AryEq ary1 ary2));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
   //ins_cost(300);
 
-  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL EBX, EDX" %}
-  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
+  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode %{
+    __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
+                          $tmp3$$Register, $result$$Register, $tmp4$$Register,
+                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Mon Sep 14 12:14:20 2009 -0700
@@ -3701,448 +3701,6 @@
     }
   %}
 
-  enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
-                        rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
-    Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
-          POP_LABEL, DONE_LABEL, CONT_LABEL,
-          WHILE_HEAD_LABEL;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
-
-    // Get the first character position in both strings
-    //         [8] char array, [12] offset, [16] count
-    int value_offset  = java_lang_String::value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int count_offset  = java_lang_String::count_offset_in_bytes();
-    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    masm.load_heap_oop(rax, Address(rsi, value_offset));
-    masm.movl(rcx, Address(rsi, offset_offset));
-    masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
-    masm.load_heap_oop(rbx, Address(rdi, value_offset));
-    masm.movl(rcx, Address(rdi, offset_offset));
-    masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
-
-    // Compute the minimum of the string lengths(rsi) and the
-    // difference of the string lengths (stack)
-
-    // do the conditional move stuff
-    masm.movl(rdi, Address(rdi, count_offset));
-    masm.movl(rsi, Address(rsi, count_offset));
-    masm.movl(rcx, rdi);
-    masm.subl(rdi, rsi);
-    masm.push(rdi);
-    masm.cmov(Assembler::lessEqual, rsi, rcx);
-
-    // Is the minimum length zero?
-    masm.bind(RCX_GOOD_LABEL);
-    masm.testl(rsi, rsi);
-    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-    // Load first characters
-    masm.load_unsigned_short(rcx, Address(rbx, 0));
-    masm.load_unsigned_short(rdi, Address(rax, 0));
-
-    // Compare first characters
-    masm.subl(rcx, rdi);
-    masm.jcc(Assembler::notZero,  POP_LABEL);
-    masm.decrementl(rsi);
-    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-    {
-      // Check after comparing first character to see if strings are equivalent
-      Label LSkip2;
-      // Check if the strings start at same location
-      masm.cmpptr(rbx, rax);
-      masm.jccb(Assembler::notEqual, LSkip2);
-
-      // Check if the length difference is zero (from stack)
-      masm.cmpl(Address(rsp, 0), 0x0);
-      masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
-
-      // Strings might not be equivalent
-      masm.bind(LSkip2);
-    }
-
-    // Advance to next character
-    masm.addptr(rax, 2);
-    masm.addptr(rbx, 2);
-
-    if (UseSSE42Intrinsics) {
-      // With SSE4.2, use double quad vector compare
-      Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
-      // Setup to compare 16-byte vectors
-      masm.movl(rdi, rsi);
-      masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
-      masm.andl(rdi, 0x00000007); // rdi holds the tail count
-      masm.testl(rsi, rsi);
-      masm.jccb(Assembler::zero, COMPARE_TAIL);
-
-      masm.lea(rax, Address(rax, rsi, Address::times_2));
-      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
-      masm.negptr(rsi);
-
-      masm.bind(COMPARE_VECTORS);
-      masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
-      masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
-      masm.pxor(tmp1Reg, tmp2Reg);
-      masm.ptest(tmp1Reg, tmp1Reg);
-      masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
-      masm.addptr(rsi, 8);
-      masm.jcc(Assembler::notZero, COMPARE_VECTORS);
-      masm.jmpb(COMPARE_TAIL);
-
-      // Mismatched characters in the vectors
-      masm.bind(VECTOR_NOT_EQUAL);
-      masm.lea(rax, Address(rax, rsi, Address::times_2));
-      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
-      masm.movl(rdi, 8);
-
-      // Compare tail (< 8 chars), or rescan last vectors to
-      // find 1st mismatched characters
-      masm.bind(COMPARE_TAIL);
-      masm.testl(rdi, rdi);
-      masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
-      masm.movl(rsi, rdi);
-      // Fallthru to tail compare
-    }
-
-    // Shift RAX and RBX to the end of the arrays, negate min
-    masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
-    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
-    masm.negptr(rsi);
-
-    // Compare the rest of the characters
-    masm.bind(WHILE_HEAD_LABEL);
-    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
-    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
-    masm.subl(rcx, rdi);
-    masm.jccb(Assembler::notZero, POP_LABEL);
-    masm.increment(rsi);
-    masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
-
-    // Strings are equal up to min length.  Return the length difference.
-    masm.bind(LENGTH_DIFF_LABEL);
-    masm.pop(rcx);
-    masm.jmpb(DONE_LABEL);
-
-    // Discard the stored length difference
-    masm.bind(POP_LABEL);
-    masm.addptr(rsp, 8);
-
-    // That's it
-    masm.bind(DONE_LABEL);
-  %}
-
- enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
-                        rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{
-    // SSE4.2 version
-    Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
-          SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-
-    // Get the first character position in both strings
-    //         [8] char array, [12] offset, [16] count
-    int value_offset  = java_lang_String::value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int count_offset  = java_lang_String::count_offset_in_bytes();
-    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // Get counts for string and substr
-    masm.movl(rdx, Address(rsi, count_offset));
-    masm.movl(rax, Address(rdi, count_offset));
-    // Check for substr count > string count
-    masm.cmpl(rax, rdx);
-    masm.jcc(Assembler::greater, RET_NEG_ONE);
-
-    // Start the indexOf operation
-    // Get start addr of string
-    masm.load_heap_oop(rbx, Address(rsi, value_offset));
-    masm.movl(rcx, Address(rsi, offset_offset));
-    masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
-    masm.push(rsi);
-
-    // Get start addr of substr
-    masm.load_heap_oop(rbx, Address(rdi, value_offset));
-    masm.movl(rcx, Address(rdi, offset_offset));
-    masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
-    masm.push(rdi);
-    masm.push(rax);
-    masm.jmpb(PREP_FOR_SCAN);
-
-    // Substr count saved at sp
-    // Substr saved at sp+8
-    // String saved at sp+16
-
-    // Prep to load substr for scan
-    masm.bind(LOAD_SUBSTR);
-    masm.movptr(rdi, Address(rsp, 8));
-    masm.movl(rax, Address(rsp, 0));
-
-    // Load substr
-    masm.bind(PREP_FOR_SCAN);
-    masm.movdqu(tmp1Reg, Address(rdi, 0));
-    masm.addq(rdx, 8);    // prime the loop
-    masm.subptr(rsi, 16);
-
-    // Scan string for substr in 16-byte vectors
-    masm.bind(SCAN_TO_SUBSTR);
-    masm.subq(rdx, 8);
-    masm.addptr(rsi, 16);
-    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
-    masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
-    masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
-
-    // Fallthru: found a potential substr
-
-    //Make sure string is still long enough
-    masm.subl(rdx, rcx);
-    masm.cmpl(rdx, rax);
-    masm.jccb(Assembler::negative, RET_NOT_FOUND);
-    // Compute start addr of substr
-    masm.lea(rsi, Address(rsi, rcx, Address::times_2));
-    masm.movptr(rbx, rsi);
-
-    // Compare potential substr
-    masm.addq(rdx, 8);        // prime the loop
-    masm.addq(rax, 8);
-    masm.subptr(rsi, 16);
-    masm.subptr(rdi, 16);
-
-    // Scan 16-byte vectors of string and substr
-    masm.bind(SCAN_SUBSTR);
-    masm.subq(rax, 8);
-    masm.subq(rdx, 8);
-    masm.addptr(rsi, 16);
-    masm.addptr(rdi, 16);
-    masm.movdqu(tmp1Reg, Address(rdi, 0));
-    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
-    masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
-    masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
-
-    // Compute substr offset
-    masm.movptr(rsi, Address(rsp, 16));
-    masm.subptr(rbx, rsi);
-    masm.shrl(rbx, 1);
-    masm.jmpb(CLEANUP);
-
-    masm.bind(RET_NEG_ONE);
-    masm.movl(rbx, -1);
-    masm.jmpb(DONE);
-
-    masm.bind(RET_NOT_FOUND);
-    masm.movl(rbx, -1);
-
-    masm.bind(CLEANUP);
-    masm.addptr(rsp, 24);
-
-    masm.bind(DONE);
-  %}
-
-  enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
-                              rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{
-    Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
-
-    int value_offset  = java_lang_String::value_offset_in_bytes();
-    int offset_offset = java_lang_String::offset_offset_in_bytes();
-    int count_offset  = java_lang_String::count_offset_in_bytes();
-    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // does source == target string?
-    masm.cmpptr(rdi, rsi);
-    masm.jcc(Assembler::equal, RET_TRUE);
-
-    // get and compare counts
-    masm.movl(rcx, Address(rdi, count_offset));
-    masm.movl(rax, Address(rsi, count_offset));
-    masm.cmpl(rcx, rax);
-    masm.jcc(Assembler::notEqual, RET_FALSE);
-    masm.testl(rax, rax);
-    masm.jcc(Assembler::zero, RET_TRUE);
-
-    // get source string offset and value
-    masm.load_heap_oop(rbx, Address(rsi, value_offset));
-    masm.movl(rax, Address(rsi, offset_offset));
-    masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset));
-
-    // get compare string offset and value
-    masm.load_heap_oop(rbx, Address(rdi, value_offset));
-    masm.movl(rax, Address(rdi, offset_offset));
-    masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset));
-
-    // Set byte count
-    masm.shll(rcx, 1);
-    masm.movl(rax, rcx);
-
-    if (UseSSE42Intrinsics) {
-      // With SSE4.2, use double quad vector compare
-      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-      // Compare 16-byte vectors
-      masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
-      masm.andl(rax, 0x0000000e);  // tail count (in bytes)
-      masm.testl(rcx, rcx);
-      masm.jccb(Assembler::zero, COMPARE_TAIL);
-      masm.lea(rdi, Address(rdi, rcx, Address::times_1));
-      masm.lea(rsi, Address(rsi, rcx, Address::times_1));
-      masm.negptr(rcx);
-
-      masm.bind(COMPARE_WIDE_VECTORS);
-      masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
-      masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
-      masm.pxor(tmp1Reg, tmp2Reg);
-      masm.ptest(tmp1Reg, tmp1Reg);
-      masm.jccb(Assembler::notZero, RET_FALSE);
-      masm.addptr(rcx, 16);
-      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-      masm.bind(COMPARE_TAIL);
-      masm.movl(rcx, rax);
-      // Fallthru to tail compare
-    }
-
-    // Compare 4-byte vectors
-    masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
-    masm.andl(rax, 0x00000002);  // tail char (in bytes)
-    masm.testl(rcx, rcx);
-    masm.jccb(Assembler::zero, COMPARE_CHAR);
-    masm.lea(rdi, Address(rdi, rcx, Address::times_1));
-    masm.lea(rsi, Address(rsi, rcx, Address::times_1));
-    masm.negptr(rcx);
-
-    masm.bind(COMPARE_VECTORS);
-    masm.movl(rbx, Address(rdi, rcx, Address::times_1));
-    masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
-    masm.jccb(Assembler::notEqual, RET_FALSE);
-    masm.addptr(rcx, 4);
-    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
-
-    // Compare trailing char (final 2 bytes), if any
-    masm.bind(COMPARE_CHAR);
-    masm.testl(rax, rax);
-    masm.jccb(Assembler::zero, RET_TRUE);
-    masm.load_unsigned_short(rbx, Address(rdi, 0));
-    masm.load_unsigned_short(rcx, Address(rsi, 0));
-    masm.cmpl(rbx, rcx);
-    masm.jccb(Assembler::notEqual, RET_FALSE);
-
-    masm.bind(RET_TRUE);
-    masm.movl(rax, 1);   // return true
-    masm.jmpb(DONE);
-
-    masm.bind(RET_FALSE);
-    masm.xorl(rax, rax); // return false
-
-    masm.bind(DONE);
-  %}
-
-  enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
-                             rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
-    Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
-    MacroAssembler masm(&cbuf);
-
-    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
-    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
-    Register ary1Reg      = as_Register($ary1$$reg);
-    Register ary2Reg      = as_Register($ary2$$reg);
-    Register tmp3Reg      = as_Register($tmp3$$reg);
-    Register tmp4Reg      = as_Register($tmp4$$reg);
-    Register resultReg    = as_Register($result$$reg);
-
-    int length_offset  = arrayOopDesc::length_offset_in_bytes();
-    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // Check the input args
-    masm.cmpq(ary1Reg, ary2Reg);
-    masm.jcc(Assembler::equal, TRUE_LABEL);
-    masm.testq(ary1Reg, ary1Reg);
-    masm.jcc(Assembler::zero, FALSE_LABEL);
-    masm.testq(ary2Reg, ary2Reg);
-    masm.jcc(Assembler::zero, FALSE_LABEL);
-
-    // Check the lengths
-    masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
-    masm.movl(resultReg, Address(ary2Reg, length_offset));
-    masm.cmpl(tmp4Reg, resultReg);
-    masm.jcc(Assembler::notEqual, FALSE_LABEL);
-    masm.testl(resultReg, resultReg);
-    masm.jcc(Assembler::zero, TRUE_LABEL);
-
-    //load array address
-    masm.lea(ary1Reg, Address(ary1Reg, base_offset));
-    masm.lea(ary2Reg, Address(ary2Reg, base_offset));
-
-    //set byte count
-    masm.shll(tmp4Reg, 1);
-    masm.movl(resultReg,tmp4Reg);
-
-    if (UseSSE42Intrinsics){
-      // With SSE4.2, use double quad vector compare
-      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-      // Compare 16-byte vectors
-      masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
-      masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
-      masm.testl(tmp4Reg, tmp4Reg);
-      masm.jccb(Assembler::zero, COMPARE_TAIL);
-      masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-      masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-      masm.negptr(tmp4Reg);
-
-      masm.bind(COMPARE_WIDE_VECTORS);
-      masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-      masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-      masm.pxor(tmp1Reg, tmp2Reg);
-      masm.ptest(tmp1Reg, tmp1Reg);
-
-      masm.jccb(Assembler::notZero, FALSE_LABEL);
-      masm.addptr(tmp4Reg, 16);
-      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-      masm.bind(COMPARE_TAIL);
-      masm.movl(tmp4Reg, resultReg);
-      // Fallthru to tail compare
-    }
-
-   // Compare 4-byte vectors
-    masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
-    masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
-    masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
-    masm.jccb(Assembler::zero, COMPARE_CHAR);
-    masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-    masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-    masm.negptr(tmp4Reg);
-
-    masm.bind(COMPARE_VECTORS);
-    masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
-    masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
-    masm.jccb(Assembler::notEqual, FALSE_LABEL);
-    masm.addptr(tmp4Reg, 4);
-    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
-
-    // Compare trailing char (final 2 bytes), if any
-    masm.bind(COMPARE_CHAR);
-    masm.testl(resultReg, resultReg);
-    masm.jccb(Assembler::zero, TRUE_LABEL);
-    masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
-    masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
-    masm.cmpl(tmp3Reg, tmp4Reg);
-    masm.jccb(Assembler::notEqual, FALSE_LABEL);
-
-    masm.bind(TRUE_LABEL);
-    masm.movl(resultReg, 1);   // return true
-    masm.jmpb(DONE);
-
-    masm.bind(FALSE_LABEL);
-    masm.xorl(resultReg, resultReg); // return false
-
-    // That's it
-    masm.bind(DONE);
-  %}
 
   enc_class enc_rethrow()
   %{
@@ -12096,52 +11654,67 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
-                        rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
-%{
-  match(Set result (StrComp str1 str2));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
-  ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
+instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
+                        rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
+%{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
-instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
-                        rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr)
+instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
+                        rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
 %{
   predicate(UseSSE42Intrinsics);
-  match(Set result (StrIndexOf str1 str2));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
-
-  format %{ "String IndexOf $str1,$str2 -> $result   // KILL RAX, RCX, RDX" %}
-  ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, $tmp2$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // fast string equals
-instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3,
-                       rcx_RegI tmp4, rax_RegI result, rFlagsReg cr)
-%{
-  match(Set result (StrEquals str1 str2));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
-
-  format %{ "String Equals $str1,$str2 -> $result    // KILL RBX, RCX" %}
-  ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
+instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
+                       regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
+%{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
+
+  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
+  ins_encode %{
+    __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
+                          $cnt$$Register, $result$$Register, $tmp3$$Register,
+                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // fast array equals
-instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
-                      rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
+instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
+                      regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
 %{
   match(Set result (AryEq ary1 ary2));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
   //ins_cost(300);
 
-  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL RAX, RBX" %}
-  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
+  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode %{
+    __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
+                          $tmp3$$Register, $result$$Register, $tmp4$$Register,
+                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
--- a/hotspot/src/share/vm/adlc/formssel.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/share/vm/adlc/formssel.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -828,11 +828,13 @@
     return AdlcVMDeps::Parms;   // Skip the machine-state edges
 
   if( _matrule->_rChild &&
-      ( strcmp(_matrule->_rChild->_opType,"StrComp"   )==0 ||
+      ( strcmp(_matrule->_rChild->_opType,"AryEq"     )==0 ||
+        strcmp(_matrule->_rChild->_opType,"StrComp"   )==0 ||
         strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
         strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) {
-        // String.(compareTo/equals/indexOf) take 1 control and 4 memory edges.
-    return 5;
+        // String.(compareTo/equals/indexOf) and Arrays.equals
+        // take 1 control and 1 memory edges.
+    return 2;
   }
 
   // Check for handling of 'Memory' input/edge in the ideal world.
--- a/hotspot/src/share/vm/opto/library_call.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -133,6 +133,7 @@
     return generate_method_call(method_id, true, false);
   }
 
+  Node* make_string_method_node(int opcode, Node* str1, Node* cnt1, Node* str2, Node* cnt2);
   bool inline_string_compareTo();
   bool inline_string_indexOf();
   Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
@@ -796,6 +797,64 @@
 }
 
 
+//------------------------------make_string_method_node------------------------
+// Helper method for String intrinsic finctions.
+Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1, Node* cnt1, Node* str2, Node* cnt2) {
+  const int value_offset  = java_lang_String::value_offset_in_bytes();
+  const int count_offset  = java_lang_String::count_offset_in_bytes();
+  const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+  Node* no_ctrl = NULL;
+
+  ciInstanceKlass* klass = env()->String_klass();
+  const TypeInstPtr* string_type =
+        TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
+
+  const TypeAryPtr* value_type =
+        TypeAryPtr::make(TypePtr::NotNull,
+                         TypeAry::make(TypeInt::CHAR,TypeInt::POS),
+                         ciTypeArrayKlass::make(T_CHAR), true, 0);
+
+  // Get start addr of string and substring
+  Node* str1_valuea  = basic_plus_adr(str1, str1, value_offset);
+  Node* str1_value   = make_load(no_ctrl, str1_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset));
+  Node* str1_offseta = basic_plus_adr(str1, str1, offset_offset);
+  Node* str1_offset  = make_load(no_ctrl, str1_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
+  Node* str1_start   = array_element_address(str1_value, str1_offset, T_CHAR);
+
+  // Pin loads from String::equals() argument since it could be NULL.
+  Node* str2_ctrl = (opcode == Op_StrEquals) ? control() : no_ctrl;
+  Node* str2_valuea  = basic_plus_adr(str2, str2, value_offset);
+  Node* str2_value   = make_load(str2_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset));
+  Node* str2_offseta = basic_plus_adr(str2, str2, offset_offset);
+  Node* str2_offset  = make_load(str2_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
+  Node* str2_start   = array_element_address(str2_value, str2_offset, T_CHAR);
+
+  Node* result = NULL;
+  switch (opcode) {
+  case Op_StrIndexOf:
+    result = new (C, 6) StrIndexOfNode(control(), memory(TypeAryPtr::CHARS),
+                                       str1_start, cnt1, str2_start, cnt2);
+    break;
+  case Op_StrComp:
+    result = new (C, 6) StrCompNode(control(), memory(TypeAryPtr::CHARS),
+                                    str1_start, cnt1, str2_start, cnt2);
+    break;
+  case Op_StrEquals:
+    result = new (C, 5) StrEqualsNode(control(), memory(TypeAryPtr::CHARS),
+                                      str1_start, str2_start, cnt1);
+    break;
+  default:
+    ShouldNotReachHere();
+    return NULL;
+  }
+
+  // All these intrinsics have checks.
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  return _gvn.transform(result);
+}
+
 //------------------------------inline_string_compareTo------------------------
 bool LibraryCallKit::inline_string_compareTo() {
 
@@ -824,16 +883,16 @@
   ciInstanceKlass* klass = env()->String_klass();
   const TypeInstPtr* string_type =
     TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
-
-  Node* compare =
-    _gvn.transform(new (C, 7) StrCompNode(
-                        control(),
-                        memory(TypeAryPtr::CHARS),
-                        memory(string_type->add_offset(value_offset)),
-                        memory(string_type->add_offset(count_offset)),
-                        memory(string_type->add_offset(offset_offset)),
-                        receiver,
-                        argument));
+  Node* no_ctrl = NULL;
+
+  // Get counts for string and argument
+  Node* receiver_cnta = basic_plus_adr(receiver, receiver, count_offset);
+  Node* receiver_cnt  = make_load(no_ctrl, receiver_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+
+  Node* argument_cnta = basic_plus_adr(argument, argument, count_offset);
+  Node* argument_cnt  = make_load(no_ctrl, argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+
+  Node* compare = make_string_method_node(Op_StrComp, receiver, receiver_cnt, argument, argument_cnt);
   push(compare);
   return true;
 }
@@ -865,45 +924,71 @@
     return true;
   }
 
+  // paths (plus control) merge
+  RegionNode* region = new (C, 5) RegionNode(5);
+  Node* phi = new (C, 5) PhiNode(region, TypeInt::BOOL);
+
+  // does source == target string?
+  Node* cmp = _gvn.transform(new (C, 3) CmpPNode(receiver, argument));
+  Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq));
+
+  Node* if_eq = generate_slow_guard(bol, NULL);
+  if (if_eq != NULL) {
+    // receiver == argument
+    phi->init_req(2, intcon(1));
+    region->init_req(2, if_eq);
+  }
+
   // get String klass for instanceOf
   ciInstanceKlass* klass = env()->String_klass();
 
-  // two paths (plus control) merge
-  RegionNode* region = new (C, 3) RegionNode(3);
-  Node* phi = new (C, 3) PhiNode(region, TypeInt::BOOL);
-
-  Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
-  Node* cmp  = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1)));
-  Node* bol  = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq));
-
-  IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
-
-  Node* if_true  = _gvn.transform(new (C, 1) IfTrueNode(iff));
-  set_control(if_true);
+  if (!stopped()) {
+    Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
+    Node* cmp  = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1)));
+    Node* bol  = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::ne));
+
+    Node* inst_false = generate_guard(bol, NULL, PROB_MIN);
+    //instanceOf == true, fallthrough
+
+    if (inst_false != NULL) {
+      phi->init_req(3, intcon(0));
+      region->init_req(3, inst_false);
+    }
+  }
 
   const TypeInstPtr* string_type =
     TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
 
-  // instanceOf == true
-  Node* equals =
-    _gvn.transform(new (C, 7) StrEqualsNode(
-                        control(),
-                        memory(TypeAryPtr::CHARS),
-                        memory(string_type->add_offset(value_offset)),
-                        memory(string_type->add_offset(count_offset)),
-                        memory(string_type->add_offset(offset_offset)),
-                        receiver,
-                        argument));
-
-  phi->init_req(1, _gvn.transform(equals));
-  region->init_req(1, if_true);
-
-  //instanceOf == false, fallthrough
-  Node* if_false = _gvn.transform(new (C, 1) IfFalseNode(iff));
-  set_control(if_false);
-
-  phi->init_req(2, _gvn.transform(intcon(0)));
-  region->init_req(2, if_false);
+  Node* no_ctrl = NULL;
+  Node* receiver_cnt;
+  Node* argument_cnt;
+
+  if (!stopped()) {
+    // Get counts for string and argument
+    Node* receiver_cnta = basic_plus_adr(receiver, receiver, count_offset);
+    receiver_cnt  = make_load(no_ctrl, receiver_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+
+    // Pin load from argument string since it could be NULL.
+    Node* argument_cnta = basic_plus_adr(argument, argument, count_offset);
+    argument_cnt  = make_load(control(), argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+
+    // Check for receiver count != argument count
+    Node* cmp = _gvn.transform( new(C, 3) CmpINode(receiver_cnt, argument_cnt) );
+    Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::ne) );
+    Node* if_ne = generate_slow_guard(bol, NULL);
+    if (if_ne != NULL) {
+      phi->init_req(4, intcon(0));
+      region->init_req(4, if_ne);
+    }
+  }
+
+  // Check for count == 0 is done by mach node StrEquals.
+
+  if (!stopped()) {
+    Node* equals = make_string_method_node(Op_StrEquals, receiver, receiver_cnt, argument, argument_cnt);
+    phi->init_req(1, equals);
+    region->init_req(1, control());
+  }
 
   // post merge
   set_control(_gvn.transform(region));
@@ -924,10 +1009,8 @@
   Node *argument1 = pop();
 
   Node* equals =
-    _gvn.transform(new (C, 3) AryEqNode(control(),
-                                        argument1,
-                                        argument2)
-                   );
+    _gvn.transform(new (C, 4) AryEqNode(control(), memory(TypeAryPtr::CHARS),
+                                        argument1, argument2) );
   push(equals);
   return true;
 }
@@ -1108,19 +1191,40 @@
       return true;
     }
 
+    // Make the merge point
+    RegionNode* result_rgn = new (C, 3) RegionNode(3);
+    Node*       result_phi = new (C, 3) PhiNode(result_rgn, TypeInt::INT);
+    Node* no_ctrl  = NULL;
+
     ciInstanceKlass* klass = env()->String_klass();
     const TypeInstPtr* string_type =
       TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
 
-    result =
-      _gvn.transform(new (C, 7)
-                     StrIndexOfNode(control(),
-                                    memory(TypeAryPtr::CHARS),
-                                    memory(string_type->add_offset(value_offset)),
-                                    memory(string_type->add_offset(count_offset)),
-                                    memory(string_type->add_offset(offset_offset)),
-                                    receiver,
-                                    argument));
+    // Get counts for string and substr
+    Node* source_cnta = basic_plus_adr(receiver, receiver, count_offset);
+    Node* source_cnt  = make_load(no_ctrl, source_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+
+    Node* substr_cnta = basic_plus_adr(argument, argument, count_offset);
+    Node* substr_cnt  = make_load(no_ctrl, substr_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+
+    // Check for substr count > string count
+    Node* cmp = _gvn.transform( new(C, 3) CmpINode(substr_cnt, source_cnt) );
+    Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::gt) );
+    Node* if_gt = generate_slow_guard(bol, NULL);
+    if (if_gt != NULL) {
+      result_phi->init_req(2, intcon(-1));
+      result_rgn->init_req(2, if_gt);
+    }
+
+    if (!stopped()) {
+      result = make_string_method_node(Op_StrIndexOf, receiver, source_cnt, argument, substr_cnt);
+      result_phi->init_req(1, result);
+      result_rgn->init_req(1, control());
+    }
+    set_control(_gvn.transform(result_rgn));
+    record_for_igvn(result_rgn);
+    result = _gvn.transform(result_phi);
+
   } else { //Use LibraryCallKit::string_indexOf
     // don't intrinsify is argument isn't a constant string.
     if (!argument->is_Con()) {
--- a/hotspot/src/share/vm/opto/matcher.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -2032,6 +2032,23 @@
         n->del_req(3);
         break;
       }
+      case Op_StrEquals: {
+        Node *pair1 = new (C, 3) BinaryNode(n->in(2),n->in(3));
+        n->set_req(2,pair1);
+        n->set_req(3,n->in(4));
+        n->del_req(4);
+        break;
+      }
+      case Op_StrComp:
+      case Op_StrIndexOf: {
+        Node *pair1 = new (C, 3) BinaryNode(n->in(2),n->in(3));
+        n->set_req(2,pair1);
+        Node *pair2 = new (C, 3) BinaryNode(n->in(4),n->in(5));
+        n->set_req(3,pair2);
+        n->del_req(5);
+        n->del_req(4);
+        break;
+      }
       default:
         break;
       }
--- a/hotspot/src/share/vm/opto/memnode.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -2514,7 +2514,7 @@
 //=============================================================================
 // Do we match on this edge? No memory edges
 uint StrCompNode::match_edge(uint idx) const {
-  return idx == 5 || idx == 6;
+  return idx == 2 || idx == 3; // StrComp (Binary str1 cnt1) (Binary str2 cnt2)
 }
 
 //------------------------------Ideal------------------------------------------
@@ -2524,9 +2524,10 @@
   return remove_dead_region(phase, can_reshape) ? this : NULL;
 }
 
+//=============================================================================
 // Do we match on this edge? No memory edges
 uint StrEqualsNode::match_edge(uint idx) const {
-  return idx == 5 || idx == 6;
+  return idx == 2 || idx == 3; // StrEquals (Binary str1 str2) cnt
 }
 
 //------------------------------Ideal------------------------------------------
@@ -2539,7 +2540,7 @@
 //=============================================================================
 // Do we match on this edge? No memory edges
 uint StrIndexOfNode::match_edge(uint idx) const {
-  return idx == 5 || idx == 6;
+  return idx == 2 || idx == 3; // StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)
 }
 
 //------------------------------Ideal------------------------------------------
@@ -2549,6 +2550,11 @@
   return remove_dead_region(phase, can_reshape) ? this : NULL;
 }
 
+//=============================================================================
+// Do we match on this edge? No memory edges
+uint AryEqNode::match_edge(uint idx) const {
+  return idx == 2 || idx == 3; // StrEquals ary1 ary2
+}
 //------------------------------Ideal------------------------------------------
 // Return a node which is more "ideal" than the current node.  Strip out
 // control copies
--- a/hotspot/src/share/vm/opto/memnode.hpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Mon Sep 14 12:14:20 2009 -0700
@@ -748,22 +748,15 @@
 //------------------------------StrComp-------------------------------------
 class StrCompNode: public Node {
 public:
-  StrCompNode(Node *control,
-              Node* char_array_mem,
-              Node* value_mem,
-              Node* count_mem,
-              Node* offset_mem,
-              Node* s1, Node* s2): Node(control,
-                                        char_array_mem,
-                                        value_mem,
-                                        count_mem,
-                                        offset_mem,
-                                        s1, s2) {};
+  StrCompNode(Node* control, Node* char_array_mem,
+              Node* s1, Node* c1,
+              Node* s2, Node* c2): Node(control, char_array_mem,
+                                        s1, c1,
+                                        s2, c2) {};
   virtual int Opcode() const;
   virtual bool depends_only_on_test() const { return false; }
   virtual const Type* bottom_type() const { return TypeInt::INT; }
-  // a StrCompNode (conservatively) aliases with everything:
-  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+  virtual const TypePtr* adr_type() const { return TypeAryPtr::CHARS; }
   virtual uint match_edge(uint idx) const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -772,22 +765,13 @@
 //------------------------------StrEquals-------------------------------------
 class StrEqualsNode: public Node {
 public:
-  StrEqualsNode(Node *control,
-                Node* char_array_mem,
-                Node* value_mem,
-                Node* count_mem,
-                Node* offset_mem,
-                Node* s1, Node* s2): Node(control,
-                                          char_array_mem,
-                                          value_mem,
-                                          count_mem,
-                                          offset_mem,
-                                          s1, s2) {};
+  StrEqualsNode(Node* control, Node* char_array_mem,
+                Node* s1, Node* s2, Node* c): Node(control, char_array_mem,
+                                                   s1, s2, c) {};
   virtual int Opcode() const;
   virtual bool depends_only_on_test() const { return false; }
   virtual const Type* bottom_type() const { return TypeInt::BOOL; }
-  // a StrEqualsNode (conservatively) aliases with everything:
-  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+  virtual const TypePtr* adr_type() const { return TypeAryPtr::CHARS; }
   virtual uint match_edge(uint idx) const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -796,22 +780,15 @@
 //------------------------------StrIndexOf-------------------------------------
 class StrIndexOfNode: public Node {
 public:
-  StrIndexOfNode(Node *control,
-                 Node* char_array_mem,
-                 Node* value_mem,
-                 Node* count_mem,
-                 Node* offset_mem,
-                 Node* s1, Node* s2): Node(control,
-                                           char_array_mem,
-                                           value_mem,
-                                           count_mem,
-                                           offset_mem,
-                                           s1, s2) {};
+  StrIndexOfNode(Node* control, Node* char_array_mem,
+                 Node* s1, Node* c1,
+                 Node* s2, Node* c2): Node(control, char_array_mem,
+                                           s1, c1,
+                                           s2, c2) {};
   virtual int Opcode() const;
   virtual bool depends_only_on_test() const { return false; }
   virtual const Type* bottom_type() const { return TypeInt::INT; }
-  // a StrIndexOfNode (conservatively) aliases with everything:
-  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+  virtual const TypePtr* adr_type() const { return TypeAryPtr::CHARS; }
   virtual uint match_edge(uint idx) const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -820,11 +797,13 @@
 //------------------------------AryEq---------------------------------------
 class AryEqNode: public Node {
 public:
-  AryEqNode(Node *control, Node* s1, Node* s2): Node(control, s1, s2) {};
+  AryEqNode(Node* control, Node* char_array_mem,
+            Node* s1, Node* s2): Node(control, char_array_mem, s1, s2) {};
   virtual int Opcode() const;
   virtual bool depends_only_on_test() const { return false; }
   virtual const Type* bottom_type() const { return TypeInt::BOOL; }
   virtual const TypePtr* adr_type() const { return TypeAryPtr::CHARS; }
+  virtual uint match_edge(uint idx) const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6875866/Test.java	Mon Sep 14 12:14:20 2009 -0700
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6875866
+ * @summary Intrinsic for String.indexOf() is broken on x86 with SSE4.2
+ *
+ * @run main/othervm -Xcomp Test
+ */
+
+public class Test {
+
+  static int IndexOfTest(String str) {
+    return str.indexOf("11111xx1x");
+  }
+
+  public static void main(String args[]) {
+    String str = "11111xx11111xx1x";
+    int idx = IndexOfTest(str);
+    System.out.println("IndexOf = " + idx);
+    if (idx != 7) {
+      System.exit(97);
+    }
+  }
+}