8151268: Wire up the x86 _vectorizedMismatch stub routine in C1
authorpsandoz
Thu, 05 May 2016 10:03:26 -0700
changeset 38238 1bbcc430c78d
parent 38237 d972e3a2df53
child 38239 4d8b8ba74fea
8151268: Wire up the x86 _vectorizedMismatch stub routine in C1 Reviewed-by: kvn
hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp
hotspot/src/share/vm/c1/c1_Compiler.cpp
hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
hotspot/src/share/vm/c1/c1_Runtime1.cpp
hotspot/src/share/vm/classfile/vmSymbols.cpp
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Thu May 05 10:03:26 2016 -0700
@@ -1033,6 +1033,10 @@
   Unimplemented();
 }
 
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Thu May 05 10:03:26 2016 -0700
@@ -1426,6 +1426,10 @@
       ShouldNotReachHere();
     }
   }
+
+  void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+    fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+  }
 }
 
 void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Thu May 05 10:03:26 2016 -0700
@@ -952,6 +952,10 @@
   }
 }
 
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu May 05 10:03:26 2016 -0700
@@ -1112,6 +1112,83 @@
   Unimplemented();
 }
 
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  assert(UseVectorizedMismatchIntrinsic, "need AVX instruction support");
+
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+
+  LIRItem a(x->argument_at(0), this); // Object
+  LIRItem aOffset(x->argument_at(1), this); // long
+  LIRItem b(x->argument_at(2), this); // Object
+  LIRItem bOffset(x->argument_at(3), this); // long
+  LIRItem length(x->argument_at(4), this); // int
+  LIRItem log2ArrayIndexScale(x->argument_at(5), this); // int
+
+  a.load_item();
+  aOffset.load_nonconstant();
+  b.load_item();
+  bOffset.load_nonconstant();
+
+  long constant_aOffset = 0;
+  LIR_Opr result_aOffset = aOffset.result();
+  if (result_aOffset->is_constant()) {
+    constant_aOffset = result_aOffset->as_jlong();
+    result_aOffset = LIR_OprFact::illegalOpr;
+  }
+  LIR_Opr result_a = a.result();
+
+  long constant_bOffset = 0;
+  LIR_Opr result_bOffset = bOffset.result();
+  if (result_bOffset->is_constant()) {
+    constant_bOffset = result_bOffset->as_jlong();
+    result_bOffset = LIR_OprFact::illegalOpr;
+  }
+  LIR_Opr result_b = b.result();
+
+#ifndef _LP64
+  result_a = new_register(T_INT);
+  __ convert(Bytecodes::_l2i, a.result(), result_a);
+  result_b = new_register(T_INT);
+  __ convert(Bytecodes::_l2i, b.result(), result_b);
+#endif
+
+
+  LIR_Address* addr_a = new LIR_Address(result_a,
+                                        result_aOffset,
+                                        LIR_Address::times_1,
+                                        constant_aOffset,
+                                        T_BYTE);
+
+  LIR_Address* addr_b = new LIR_Address(result_b,
+                                        result_bOffset,
+                                        LIR_Address::times_1,
+                                        constant_bOffset,
+                                        T_BYTE);
+
+  BasicTypeList signature(4);
+  signature.append(T_ADDRESS);
+  signature.append(T_ADDRESS);
+  signature.append(T_INT);
+  signature.append(T_INT);
+  CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+  const LIR_Opr result_reg = result_register_for(x->type());
+
+  LIR_Opr ptr_addr_a = new_pointer_register();
+  __ leal(LIR_OprFact::address(addr_a), ptr_addr_a);
+
+  LIR_Opr ptr_addr_b = new_pointer_register();
+  __ leal(LIR_OprFact::address(addr_b), ptr_addr_b);
+
+  __ move(ptr_addr_a, cc->at(0));
+  __ move(ptr_addr_b, cc->at(1));
+  length.load_item_force(cc->at(2));
+  log2ArrayIndexScale.load_item_force(cc->at(3));
+
+  __ call_runtime_leaf(StubRoutines::vectorizedMismatch(), getThreadTemp(), result_reg, cc->args());
+  __ move(result_reg, result);
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 LIR_Opr fixed_register_for(BasicType type) {
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu May 05 10:03:26 2016 -0700
@@ -845,7 +845,7 @@
   void call(Label& L, relocInfo::relocType rtype);
   void call(Register entry);
 
-  // NOTE: this call tranfers to the effective address of entry NOT
+  // NOTE: this call transfers to the effective address of entry NOT
   // the address contained by entry. This is because this is more natural
   // for jumps/calls.
   void call(AddressLiteral entry);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu May 05 10:03:26 2016 -0700
@@ -4409,7 +4409,7 @@
   *   c_rarg0   - int crc
   *   c_rarg1   - byte* buf
   *   c_rarg2   - long length
-  *   c_rarg3   - table_start - optional (present only when doing a library_calll,
+  *   c_rarg3   - table_start - optional (present only when doing a library_call,
   *              not used by x86 algorithm)
   *
   * Ouput:
@@ -4532,6 +4532,9 @@
   *    c_rarg1   - objb     address
   *    c_rarg3   - length   length
   *    c_rarg4   - scale    log2_array_indxscale
+  *
+  *  Output:
+  *        rax   - int >= mismatched index, < 0 bitwise complement of tail
   */
   address generate_vectorizedMismatch() {
     __ align(CodeEntryAlignment);
@@ -5291,9 +5294,6 @@
     if (UseMulAddIntrinsic) {
       StubRoutines::_mulAdd = generate_mulAdd();
     }
-    if (UseVectorizedMismatchIntrinsic) {
-      StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
-    }
 #ifndef _WINDOWS
     if (UseMontgomeryMultiplyIntrinsic) {
       StubRoutines::_montgomeryMultiply
@@ -5305,6 +5305,10 @@
     }
 #endif // WINDOWS
 #endif // COMPILER2
+
+    if (UseVectorizedMismatchIntrinsic) {
+      StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
+    }
   }
 
  public:
--- a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp	Thu May 05 10:03:26 2016 -0700
@@ -289,9 +289,9 @@
 }
 
 /**
-* Method entry for static native methods:
+* Method entry for static (non-native) methods:
 *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
-*   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
+*   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
 */
 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
   if (UseCRC32CIntrinsics) {
@@ -306,7 +306,7 @@
     // Arguments are reversed on java expression stack
     // Calculate address of start element
     if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
-      __ movptr(buf, Address(rsp, 3 * wordSize)); // long buf
+      __ movptr(buf, Address(rsp, 3 * wordSize)); // long address
       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
       __ addq(buf, off); // + offset
       __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
--- a/hotspot/src/share/vm/c1/c1_Compiler.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp	Thu May 05 10:03:26 2016 -0700
@@ -228,6 +228,7 @@
   case vmIntrinsics::_updateBytesCRC32C:
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
 #endif
+  case vmIntrinsics::_vectorizedMismatch:
   case vmIntrinsics::_compareAndSwapInt:
   case vmIntrinsics::_compareAndSwapObject:
   case vmIntrinsics::_getCharStringU:
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Thu May 05 10:03:26 2016 -0700
@@ -3179,6 +3179,10 @@
     do_update_CRC32C(x);
     break;
 
+  case vmIntrinsics::_vectorizedMismatch:
+    do_vectorizedMismatch(x);
+    break;
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Thu May 05 10:03:26 2016 -0700
@@ -254,6 +254,7 @@
   void do_Reference_get(Intrinsic* x);
   void do_update_CRC32(Intrinsic* x);
   void do_update_CRC32C(Intrinsic* x);
+  void do_vectorizedMismatch(Intrinsic* x);
 
   LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
   LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Thu May 05 10:03:26 2016 -0700
@@ -319,6 +319,7 @@
 #endif
   FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
   FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
+  FUNCTION_CASE(entry, StubRoutines::vectorizedMismatch());
   FUNCTION_CASE(entry, StubRoutines::dexp());
   FUNCTION_CASE(entry, StubRoutines::dlog());
   FUNCTION_CASE(entry, StubRoutines::dlog10());
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp	Wed May 04 15:30:21 2016 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp	Thu May 05 10:03:26 2016 -0700
@@ -353,6 +353,7 @@
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
+  case vmIntrinsics::_vectorizedMismatch:
     return true;
   default:
     return false;
@@ -384,6 +385,7 @@
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
+  case vmIntrinsics::_vectorizedMismatch:
     return false;
   default:
     return true;
@@ -669,6 +671,9 @@
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
     if (!UseCRC32CIntrinsics) return true;
     break;
+  case vmIntrinsics::_vectorizedMismatch:
+    if (!UseVectorizedMismatchIntrinsic) return true;
+    break;
   case vmIntrinsics::_updateBytesAdler32:
   case vmIntrinsics::_updateByteBufferAdler32:
     if (!UseAdler32Intrinsics) return true;
@@ -734,9 +739,6 @@
   case vmIntrinsics::_montgomerySquare:
     if (!UseMontgomerySquareIntrinsic) return true;
     break;
-  case vmIntrinsics::_vectorizedMismatch:
-    if (!UseVectorizedMismatchIntrinsic) return true;
-    break;
   case vmIntrinsics::_addExactI:
   case vmIntrinsics::_addExactL:
   case vmIntrinsics::_decrementExactI: