Merge
authorvlivanov
Fri, 27 May 2016 14:49:05 +0000
changeset 38718 b42f325b8f7b
parent 38709 835ea8e8b295 (current diff)
parent 38717 aa8919c796e4 (diff)
child 38721 7da36c5eb9da
Merge
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Fri May 27 14:49:05 2016 +0000
@@ -3331,7 +3331,6 @@
 
   switch (opcode) {
   case Op_StrComp:
-  case Op_StrIndexOf:
     if (CompactStrings)  return false;
     break;
   default:
@@ -4744,7 +4743,8 @@
       __ br(Assembler::EQ, cont);
     } else {
       Label retry_load;
-      __ prfm(Address(oop), PSTL1STRM);
+      if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+        __ prfm(Address(oop), PSTL1STRM);
       __ bind(retry_load);
       __ ldaxr(tmp, oop);
       __ cmp(tmp, disp_hdr);
@@ -4799,7 +4799,8 @@
         __ cmp(rscratch1, disp_hdr);
       } else {
         Label retry_load, fail;
-        __ prfm(Address(tmp), PSTL1STRM);
+        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+          __ prfm(Address(tmp), PSTL1STRM);
         __ bind(retry_load);
         __ ldaxr(rscratch1, tmp);
         __ cmp(disp_hdr, rscratch1);
@@ -4893,7 +4894,8 @@
         __ cmp(tmp, box);
       } else {
         Label retry_load;
-        __ prfm(Address(oop), PSTL1STRM);
+        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+          __ prfm(Address(oop), PSTL1STRM);
         __ bind(retry_load);
         __ ldxr(tmp, oop);
         __ cmp(box, tmp);
@@ -14953,26 +14955,83 @@
   ins_pipe(pipe_class_memory);
 %}
 
-instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
+instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
+
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      -1, $result$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
+       iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
 
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register,
                       $tmp1$$Register, $tmp2$$Register,
                       $tmp3$$Register, $tmp4$$Register,
-                      -1, $result$$Register);
+                      -1, $result$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
+       iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
+
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      -1, $result$$Register, StrIntrinsicNode::UL);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
-instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
+instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
+       iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
+
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      -1, $result$$Register, StrIntrinsicNode::LU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
 %{
@@ -14980,7 +15039,28 @@
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
+
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, zr,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      icnt2, $result$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
+                 immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
+                 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
 
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
@@ -14988,7 +15068,49 @@
                       $cnt1$$Register, zr,
                       $tmp1$$Register, $tmp2$$Register,
                       $tmp3$$Register, $tmp4$$Register,
-                      icnt2, $result$$Register);
+                      icnt2, $result$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
+                 immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
+                 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
+
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, zr,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      icnt2, $result$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
+                 immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
+                 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
+
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, zr,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      icnt2, $result$$Register, StrIntrinsicNode::LU);
   %}
   ins_pipe(pipe_class_memory);
 %}
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Fri May 27 14:49:05 2016 +0000
@@ -23,8 +23,8 @@
  *
  */
 
-#ifndef CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
-#define CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
+#ifndef CPU_AARCH64_VM_C1_LIRASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_C1_LIRASSEMBLER_AARCH64_HPP
 
 // ArrayCopyStub needs access to bailout
 friend class ArrayCopyStub;
@@ -78,4 +78,4 @@
        exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
        deopt_handler_size = 7 * NativeInstruction::instruction_size };
 
-#endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
+#endif // CPU_AARCH64_VM_C1_LIRASSEMBLER_AARCH64_HPP
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri May 27 14:49:05 2016 +0000
@@ -1643,7 +1643,8 @@
     return;
   }
   Label retry_load;
-  prfm(Address(counter_addr), PSTL1STRM);
+  if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+    prfm(Address(counter_addr), PSTL1STRM);
   bind(retry_load);
   // flush and load exclusive from the memory location
   ldxrw(tmp, counter_addr);
@@ -2084,7 +2085,8 @@
     membar(AnyAny);
   } else {
     Label retry_load, nope;
-    prfm(Address(addr), PSTL1STRM);
+    if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+      prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
     // flush and load exclusive from the memory location
     // and fail if it is not what we expect
@@ -2120,7 +2122,8 @@
     membar(AnyAny);
   } else {
     Label retry_load, nope;
-    prfm(Address(addr), PSTL1STRM);
+    if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+      prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
     // flush and load exclusive from the memory location
     // and fail if it is not what we expect
@@ -2155,7 +2158,8 @@
   } else {
     BLOCK_COMMENT("cmpxchg {");
     Label retry_load, done;
-    prfm(Address(addr), PSTL1STRM);
+    if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+      prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
     load_exclusive(tmp, addr, size, acquire);
     if (size == xword)
@@ -2194,7 +2198,8 @@
     result = different(prev, incr, addr) ? prev : rscratch2;            \
                                                                         \
   Label retry_load;                                                     \
-  prfm(Address(addr), PSTL1STRM);                                       \
+  if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))         \
+    prfm(Address(addr), PSTL1STRM);                                     \
   bind(retry_load);                                                     \
   LDXR(result, addr);                                                   \
   OP(rscratch1, result, incr);                                          \
@@ -2224,7 +2229,8 @@
     result = different(prev, newv, addr) ? prev : rscratch2;            \
                                                                         \
   Label retry_load;                                                     \
-  prfm(Address(addr), PSTL1STRM);                                       \
+  if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))         \
+    prfm(Address(addr), PSTL1STRM);                                     \
   bind(retry_load);                                                     \
   LDXR(result, addr);                                                   \
   STXR(rscratch1, newv, addr);                                          \
@@ -4136,13 +4142,14 @@
   }
 }
 
+typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
 
 // Search for str1 in str2 and return index or -1
 void MacroAssembler::string_indexof(Register str2, Register str1,
                                     Register cnt2, Register cnt1,
                                     Register tmp1, Register tmp2,
                                     Register tmp3, Register tmp4,
-                                    int icnt1, Register result) {
+                                    int icnt1, Register result, int ae) {
   Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH;
 
   Register ch1 = rscratch1;
@@ -4153,6 +4160,21 @@
   Register cnt2_neg = cnt2;
   Register result_tmp = tmp4;
 
+  bool isL = ae == StrIntrinsicNode::LL;
+
+  bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+  bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+  int str1_chr_shift = str1_isL ? 0:1;
+  int str2_chr_shift = str2_isL ? 0:1;
+  int str1_chr_size = str1_isL ? 1:2;
+  int str2_chr_size = str2_isL ? 1:2;
+  chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
+                                      (chr_insn)&MacroAssembler::ldrh;
+  chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
+                                      (chr_insn)&MacroAssembler::ldrh;
+  chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
+  chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
+
   // Note, inline_string_indexOf() generates checks:
   // if (substr.count > string.count) return -1;
   // if (substr.count == 0) return 0;
@@ -4242,7 +4264,7 @@
       mov(cnt1tmp, 0);
       sub(cnt1end, cnt1, 1);
     BIND(BCLOOP);
-      ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1)));
+      (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
       cmp(ch1, 128);
       add(cnt1tmp, cnt1tmp, 1);
       br(HS, BCSKIP);
@@ -4254,36 +4276,36 @@
       mov(result_tmp, str2);
 
       sub(cnt2, cnt2, cnt1);
-      add(str2end, str2, cnt2, LSL, 1);
+      add(str2end, str2, cnt2, LSL, str2_chr_shift);
     BIND(BMLOOPSTR2);
       sub(cnt1tmp, cnt1, 1);
-      ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1)));
-      ldrh(skipch, Address(str2, cnt1tmp, Address::lsl(1)));
+      (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
+      (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
       cmp(ch1, skipch);
       br(NE, BMSKIP);
       subs(cnt1tmp, cnt1tmp, 1);
       br(LT, BMMATCH);
     BIND(BMLOOPSTR1);
-      ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1)));
-      ldrh(ch2, Address(str2, cnt1tmp, Address::lsl(1)));
+      (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
+      (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
       cmp(ch1, ch2);
       br(NE, BMSKIP);
       subs(cnt1tmp, cnt1tmp, 1);
       br(GE, BMLOOPSTR1);
     BIND(BMMATCH);
-      sub(result_tmp, str2, result_tmp);
-      lsr(result, result_tmp, 1);
+      sub(result, str2, result_tmp);
+      if (!str2_isL) lsr(result, result, 1);
       add(sp, sp, 128);
       b(DONE);
     BIND(BMADV);
-      add(str2, str2, 2);
+      add(str2, str2, str2_chr_size);
       b(BMCHECKEND);
     BIND(BMSKIP);
       cmp(skipch, 128);
       br(HS, BMADV);
       ldrb(ch2, Address(sp, skipch));
-      add(str2, str2, cnt1, LSL, 1);
-      sub(str2, str2, ch2, LSL, 1);
+      add(str2, str2, cnt1, LSL, str2_chr_shift);
+      sub(str2, str2, ch2, LSL, str2_chr_shift);
     BIND(BMCHECKEND);
       cmp(str2, str2end);
       br(LE, BMLOOPSTR2);
@@ -4300,119 +4322,113 @@
 
     if (icnt1 == -1)
     {
-        Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT, LAST_WORD;
-
-        cmp(cnt1, 4);
+        Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+
+        cmp(cnt1, str1_isL == str2_isL ? 4 : 2);
         br(LT, DOSHORT);
 
         sub(cnt2, cnt2, cnt1);
-        sub(cnt1, cnt1, 4);
         mov(result_tmp, cnt2);
 
-        lea(str1, Address(str1, cnt1, Address::uxtw(1)));
-        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
-        sub(cnt1_neg, zr, cnt1, LSL, 1);
-        sub(cnt2_neg, zr, cnt2, LSL, 1);
-        ldr(first, Address(str1, cnt1_neg));
+        lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
+        (this->*str1_load_1chr)(first, Address(str1, cnt1_neg));
 
       BIND(FIRST_LOOP);
-        ldr(ch2, Address(str2, cnt2_neg));
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
         cmp(first, ch2);
         br(EQ, STR1_LOOP);
       BIND(STR2_NEXT);
-        adds(cnt2_neg, cnt2_neg, 2);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
         br(LE, FIRST_LOOP);
         b(NOMATCH);
 
       BIND(STR1_LOOP);
-        adds(cnt1tmp, cnt1_neg, 8);
-        add(cnt2tmp, cnt2_neg, 8);
-        br(GE, LAST_WORD);
+        adds(cnt1tmp, cnt1_neg, str1_chr_size);
+        add(cnt2tmp, cnt2_neg, str2_chr_size);
+        br(GE, MATCH);
 
       BIND(STR1_NEXT);
-        ldr(ch1, Address(str1, cnt1tmp));
-        ldr(ch2, Address(str2, cnt2tmp));
+        (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
         cmp(ch1, ch2);
         br(NE, STR2_NEXT);
-        adds(cnt1tmp, cnt1tmp, 8);
-        add(cnt2tmp, cnt2tmp, 8);
+        adds(cnt1tmp, cnt1tmp, str1_chr_size);
+        add(cnt2tmp, cnt2tmp, str2_chr_size);
         br(LT, STR1_NEXT);
-
-      BIND(LAST_WORD);
-        ldr(ch1, Address(str1));
-        sub(str2tmp, str2, cnt1_neg);         // adjust to corresponding
-        ldr(ch2, Address(str2tmp, cnt2_neg)); // word in str2
-        cmp(ch1, ch2);
-        br(NE, STR2_NEXT);
         b(MATCH);
 
       BIND(DOSHORT);
+      if (str1_isL == str2_isL) {
         cmp(cnt1, 2);
         br(LT, DO1);
         br(GT, DO3);
+      }
     }
 
     if (icnt1 == 4) {
       Label CH1_LOOP;
 
-        ldr(ch1, str1);
+        (this->*load_4chr)(ch1, str1);
         sub(cnt2, cnt2, 4);
         mov(result_tmp, cnt2);
-        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
-        sub(cnt2_neg, zr, cnt2, LSL, 1);
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
 
       BIND(CH1_LOOP);
-        ldr(ch2, Address(str2, cnt2_neg));
+        (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
         cmp(ch1, ch2);
         br(EQ, MATCH);
-        adds(cnt2_neg, cnt2_neg, 2);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
         br(LE, CH1_LOOP);
         b(NOMATCH);
     }
 
-    if (icnt1 == -1 || icnt1 == 2) {
+    if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
       Label CH1_LOOP;
 
       BIND(DO2);
-        ldrw(ch1, str1);
+        (this->*load_2chr)(ch1, str1);
         sub(cnt2, cnt2, 2);
         mov(result_tmp, cnt2);
-        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
-        sub(cnt2_neg, zr, cnt2, LSL, 1);
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
 
       BIND(CH1_LOOP);
-        ldrw(ch2, Address(str2, cnt2_neg));
+        (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
         cmp(ch1, ch2);
         br(EQ, MATCH);
-        adds(cnt2_neg, cnt2_neg, 2);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
         br(LE, CH1_LOOP);
         b(NOMATCH);
     }
 
-    if (icnt1 == -1 || icnt1 == 3) {
+    if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
       Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
 
       BIND(DO3);
-        ldrw(first, str1);
-        ldrh(ch1, Address(str1, 4));
+        (this->*load_2chr)(first, str1);
+        (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
 
         sub(cnt2, cnt2, 3);
         mov(result_tmp, cnt2);
-        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
-        sub(cnt2_neg, zr, cnt2, LSL, 1);
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
 
       BIND(FIRST_LOOP);
-        ldrw(ch2, Address(str2, cnt2_neg));
+        (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
         cmpw(first, ch2);
         br(EQ, STR1_LOOP);
       BIND(STR2_NEXT);
-        adds(cnt2_neg, cnt2_neg, 2);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
         br(LE, FIRST_LOOP);
         b(NOMATCH);
 
       BIND(STR1_LOOP);
-        add(cnt2tmp, cnt2_neg, 4);
-        ldrh(ch2, Address(str2, cnt2tmp));
+        add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
         cmp(ch1, ch2);
         br(NE, STR2_NEXT);
         b(MATCH);
@@ -4423,24 +4439,31 @@
       Label DO1_SHORT, DO1_LOOP;
 
       BIND(DO1);
-        ldrh(ch1, str1);
-        cmp(cnt2, 4);
+        (this->*str1_load_1chr)(ch1, str1);
+        cmp(cnt2, 8);
         br(LT, DO1_SHORT);
 
+        if (str2_isL) {
+          if (!str1_isL) {
+            tst(ch1, 0xff00);
+            br(NE, NOMATCH);
+          }
+          orr(ch1, ch1, ch1, LSL, 8);
+        }
         orr(ch1, ch1, ch1, LSL, 16);
         orr(ch1, ch1, ch1, LSL, 32);
 
-        sub(cnt2, cnt2, 4);
+        sub(cnt2, cnt2, 8/str2_chr_size);
         mov(result_tmp, cnt2);
-        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
-        sub(cnt2_neg, zr, cnt2, LSL, 1);
-
-        mov(tmp3, 0x0001000100010001);
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
+
+        mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
       BIND(CH1_LOOP);
         ldr(ch2, Address(str2, cnt2_neg));
         eor(ch2, ch1, ch2);
         sub(tmp1, ch2, tmp3);
-        orr(tmp2, ch2, 0x7fff7fff7fff7fff);
+        orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
         bics(tmp1, tmp1, tmp2);
         br(NE, HAS_ZERO);
         adds(cnt2_neg, cnt2_neg, 8);
@@ -4459,13 +4482,13 @@
 
       BIND(DO1_SHORT);
         mov(result_tmp, cnt2);
-        lea(str2, Address(str2, cnt2, Address::uxtw(1)));
-        sub(cnt2_neg, zr, cnt2, LSL, 1);
+        lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
+        sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
       BIND(DO1_LOOP);
-        ldrh(ch2, Address(str2, cnt2_neg));
+        (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
         cmpw(ch1, ch2);
         br(EQ, MATCH);
-        adds(cnt2_neg, cnt2_neg, 2);
+        adds(cnt2_neg, cnt2_neg, str2_chr_size);
         br(LT, DO1_LOOP);
     }
   }
@@ -4473,7 +4496,7 @@
     mov(result, -1);
     b(DONE);
   BIND(MATCH);
-    add(result, result_tmp, cnt2_neg, ASR, 1);
+    add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
   BIND(DONE);
 }
 
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri May 27 14:49:05 2016 +0000
@@ -545,6 +545,15 @@
     mrs(0b011, 0b0000, 0b0000, 0b111, reg);
   }
 
+  // CTR_EL0:   op1 == 011
+  //            CRn == 0000
+  //            CRm == 0000
+  //            op2 == 001
+  inline void get_ctr_el0(Register reg)
+  {
+    mrs(0b011, 0b0000, 0b0000, 0b001, reg);
+  }
+
   // idiv variant which deals with MINLONG as dividend and -1 as divisor
   int corrected_idivl(Register result, Register ra, Register rb,
                       bool want_remainder, Register tmp = rscratch1);
@@ -1217,7 +1226,7 @@
                       Register cnt1, Register cnt2,
                       Register tmp1, Register tmp2,
                       Register tmp3, Register tmp4,
-                      int int_cnt1, Register result);
+                      int int_cnt1, Register result, int ae);
 private:
   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
                        Register src1, Register src2);
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Fri May 27 14:49:05 2016 +0000
@@ -105,6 +105,9 @@
     __ get_dczid_el0(rscratch1);
     __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset())));
 
+    __ get_ctr_el0(rscratch1);
+    __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset())));
+
     __ leave();
     __ ret(lr);
 
@@ -124,16 +127,20 @@
 
   getPsrInfo_stub(&_psr_info);
 
+  int dcache_line = VM_Version::dcache_line_size();
+
   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance))
-    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 3*dcache_line);
   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize))
-    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
-  FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256);
-  FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256);
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line);
+  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes))
+    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line);
   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes))
-    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256);
-  if ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768)) {
-    warning("PrefetchCopyIntervalInBytes must be a multiple of 8 and < 32768");
+    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line);
+
+  if (PrefetchCopyIntervalInBytes != -1 &&
+       ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
+    warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
     PrefetchCopyIntervalInBytes &= ~7;
     if (PrefetchCopyIntervalInBytes >= 32768)
       PrefetchCopyIntervalInBytes = 32760;
@@ -170,6 +177,7 @@
   // Enable vendor specific features
   if (_cpu == CPU_CAVIUM && _variant == 0) _features |= CPU_DMB_ATOMICS;
   if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _features |= CPU_A53MAC;
+  if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
   // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
   // we assume the worst and assume we could be on a big little system and have
   // undisclosed A53 cores which we could be swapped to at any stage
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Fri May 27 14:49:05 2016 +0000
@@ -42,6 +42,7 @@
 
   struct PsrInfo {
     uint32_t dczid_el0;
+    uint32_t ctr_el0;
   };
   static PsrInfo _psr_info;
   static void get_processor_features();
@@ -78,6 +79,7 @@
     CPU_SHA2         = (1<<6),
     CPU_CRC32        = (1<<7),
     CPU_LSE          = (1<<8),
+    CPU_STXR_PREFETCH= (1 << 29),
     CPU_A53MAC       = (1 << 30),
     CPU_DMB_ATOMICS  = (1 << 31),
   };
@@ -88,6 +90,7 @@
   static int cpu_variant()                    { return _variant; }
   static int cpu_revision()                   { return _revision; }
   static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
+  static ByteSize ctr_el0_offset()   { return byte_offset_of(PsrInfo, ctr_el0); }
   static bool is_zva_enabled() {
     // Check the DZP bit (bit 4) of dczid_el0 is zero
     // and block size (bit 0~3) is not zero.
@@ -98,6 +101,12 @@
     assert(is_zva_enabled(), "ZVA not available");
     return 4 << (_psr_info.dczid_el0 & 0xf);
   }
+  static int icache_line_size() {
+    return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4;
+  }
+  static int dcache_line_size() {
+    return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4;
+  }
 };
 
 #endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Fri May 27 14:49:05 2016 +0000
@@ -3084,49 +3084,25 @@
 
 frame SharedRuntime::look_for_reserved_stack_annotated_method(JavaThread* thread, frame fr) {
   frame activation;
-  int decode_offset = 0;
-  nmethod* nm = NULL;
-  frame prv_fr = fr;
+  CompiledMethod* nm = NULL;
   int count = 1;
 
   assert(fr.is_java_frame(), "Must start on Java frame");
 
-  while (!fr.is_first_frame()) {
+  while (true) {
     Method* method = NULL;
-    // Compiled java method case.
-    if (decode_offset != 0) {
-      DebugInfoReadStream stream(nm, decode_offset);
-      decode_offset = stream.read_int();
-      method = (Method*)nm->metadata_at(stream.read_int());
+    if (fr.is_interpreted_frame()) {
+      method = fr.interpreter_frame_method();
     } else {
-      if (fr.is_first_java_frame()) break;
-      address pc = fr.pc();
-      prv_fr = fr;
-      if (fr.is_interpreted_frame()) {
-        method = fr.interpreter_frame_method();
-        fr = fr.java_sender();
-      } else {
-        CodeBlob* cb = fr.cb();
-        fr = fr.java_sender();
-        if (cb == NULL || !cb->is_nmethod()) {
-          continue;
-        }
-        nm = (nmethod*)cb;
-        if (nm->method()->is_native()) {
-          method = nm->method();
-        } else {
-          PcDesc* pd = nm->pc_desc_at(pc);
-          assert(pd != NULL, "PcDesc must not be NULL");
-          decode_offset = pd->scope_decode_offset();
-          // if decode_offset is not equal to 0, it will execute the
-          // "compiled java method case" at the beginning of the loop.
-          continue;
-        }
+      CodeBlob* cb = fr.cb();
+      if (cb != NULL && cb->is_compiled()) {
+        nm = cb->as_compiled_method();
+        method = nm->method();
       }
     }
-    if (method->has_reserved_stack_access()) {
+    if ((method != NULL) && method->has_reserved_stack_access()) {
       ResourceMark rm(thread);
-      activation = prv_fr;
+      activation = fr;
       warning("Potentially dangerous stack overflow in "
               "ReservedStackAccess annotated method %s [%d]",
               method->name_and_sig_as_C_string(), count++);
@@ -3136,6 +3112,11 @@
         event.commit();
       }
     }
+    if (fr.is_first_java_frame()) {
+      break;
+    } else {
+      fr = fr.java_sender();
+    }
   }
   return activation;
 }
--- a/hotspot/test/compiler/compilercontrol/share/MultiCommand.java	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/test/compiler/compilercontrol/share/MultiCommand.java	Fri May 27 14:49:05 2016 +0000
@@ -73,6 +73,7 @@
     public void test() {
         Scenario.Builder builder = Scenario.getBuilder();
         builder.addFlag("-Xmixed");
+        builder.addFlag("-XX:CompilerDirectivesLimit=101");
         for (CompileCommand cc : testCases) {
             cc.print();
             builder.add(cc);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/onSpinWait/TestOnSpinWaitEnableDisable.java	Fri May 27 14:49:05 2016 +0000
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Azul Systems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test TestOnSpinWaitEnableDisable
+ * @summary Test to ensure basic functioning of java.lang.Thread.onSpinWait
+ * @bug 8157683
+ * @run main TestOnSpinWaitEnableDisable
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_onSpinWait TestOnSpinWaitEnableDisable
+ */
+
+public class TestOnSpinWaitEnableDisable {
+    public static void main(String[] args) {
+        for (int i = 0; i < 50_000; i++) {
+            java.lang.Thread.onSpinWait();
+        }
+    }
+}
--- a/hotspot/test/testlibrary/jittester/src/jdk/test/lib/jittester/jtreg/JitTesterDriver.java	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/test/testlibrary/jittester/src/jdk/test/lib/jittester/jtreg/JitTesterDriver.java	Fri May 27 14:49:05 2016 +0000
@@ -33,6 +33,7 @@
 import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.function.Predicate;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -52,14 +53,16 @@
             throw new Error("Unexpected exception on test jvm start :" + e, e);
         }
 
+        Pattern splitOut = Pattern.compile("\\n"); // tests use \n only in stdout
+        Pattern splitErr = Pattern.compile("\\r?\\n"); // can handle both \r\n and \n
         Path testDir = Paths.get(Utils.TEST_SRC);
         String goldOut = formatOutput(streamGoldFile(testDir, args[0], "out"), s -> true);
-        Asserts.assertEQ(oa.getStdout(), goldOut, "Actual stdout isn't equal to golden one");
-
+        String anlzOut = formatOutput(Arrays.stream(splitOut.split(oa.getStdout())), s -> true);
+        Asserts.assertEQ(anlzOut, goldOut, "Actual stdout isn't equal to golden one");
         // TODO: add a comment why we skip such lines
         Predicate<String> notStartWhitespaces = s -> !(s.startsWith("\t") || s.startsWith(" "));
         String goldErr = formatOutput(streamGoldFile(testDir, args[0], "err"), notStartWhitespaces);
-        String anlzErr = formatOutput(Arrays.stream(oa.getStderr().split(Utils.NEW_LINE)),
+        String anlzErr = formatOutput(Arrays.stream(splitErr.split(oa.getStderr())),
                                       notStartWhitespaces);
         Asserts.assertEQ(anlzErr, goldErr, "Actual stderr isn't equal to golden one");
 
--- a/hotspot/test/testlibrary/jittester/src/jdk/test/lib/jittester/utils/FixedTrees.java	Fri May 27 05:49:24 2016 -0700
+++ b/hotspot/test/testlibrary/jittester/src/jdk/test/lib/jittester/utils/FixedTrees.java	Fri May 27 14:49:05 2016 +0000
@@ -37,7 +37,6 @@
 import jdk.test.lib.jittester.Operator;
 import jdk.test.lib.jittester.OperatorKind;
 import jdk.test.lib.jittester.PrintVariables;
-import jdk.test.lib.jittester.ProductionFailedException;
 import jdk.test.lib.jittester.Statement;
 import jdk.test.lib.jittester.StaticMemberVariable;
 import jdk.test.lib.jittester.Symbol;
@@ -171,22 +170,22 @@
         TryCatchBlock tryCatch1 = new TryCatchBlock(tryNode, nothing, catchBlocks1, 3);
         TypeKlass printStreamKlass = new TypeKlass("java.io.PrintStream");
         TypeKlass systemKlass = new TypeKlass("java.lang.System");
-        FunctionInfo systemOutPrintlnInfo = new FunctionInfo("println", printStreamKlass,
+        FunctionInfo systemOutPrintInfo = new FunctionInfo("print", printStreamKlass,
                 TypeList.VOID, 0, FunctionInfo.PUBLIC,
                 new VariableInfo("this", owner, printStreamKlass, VariableInfo.LOCAL | VariableInfo.INITIALIZED),
                 new VariableInfo("t", owner, TypeList.OBJECT,
                         VariableInfo.LOCAL  | VariableInfo.INITIALIZED));
-        List<IRNode> printlnArgs = new ArrayList<>();
+        List<IRNode> printArgs = new ArrayList<>();
         VariableInfo systemOutInfo = new VariableInfo("out", systemKlass, printStreamKlass,
                 VariableInfo.STATIC | VariableInfo.PUBLIC);
         StaticMemberVariable systemOutVar = new StaticMemberVariable(owner, systemOutInfo);
-        printlnArgs.add(systemOutVar);
-        printlnArgs.add(tVar);
-        Function println = new Function(printStreamKlass, systemOutPrintlnInfo, printlnArgs);
-        ArrayList<IRNode> printlnBlockContent = new ArrayList<>();
-        printlnBlockContent.add(new Statement(println, true));
-        Block printlnBlock = new Block(owner, TypeList.VOID, printlnBlockContent, 3);
-        TryCatchBlock tryCatch2 = new TryCatchBlock(printlnBlock, nothing, catchBlocks2, 3);
+        printArgs.add(systemOutVar);
+        printArgs.add(tVar);
+        Function print = new Function(printStreamKlass, systemOutPrintInfo, printArgs);
+        ArrayList<IRNode> printBlockContent = new ArrayList<>();
+        printBlockContent.add(new Statement(print, true));
+        Block printBlock = new Block(owner, TypeList.VOID, printBlockContent, 3);
+        TryCatchBlock tryCatch2 = new TryCatchBlock(printBlock, nothing, catchBlocks2, 3);
 
         List<IRNode> mainTryCatchBlockContent = new ArrayList<>();
         mainTryCatchBlockContent.add(new Statement(testInit, true));