8143355: Update for addition of vectorizedMismatch intrinsic for x86
Reviewed-by: kvn
Contributed-by: vivek.r.deshpande@intel.com, liqi.yi@intel.com
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -182,6 +182,11 @@
FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
}
+ if (UseVectorizedMismatchIntrinsic) {
+ warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+
if (auxv & HWCAP_AES) {
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
UseAESIntrinsics =
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -223,6 +223,11 @@
UseMultiplyToLenIntrinsic = true;
}
+ if (UseVectorizedMismatchIntrinsic) {
+ warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+
// Adjust RTM (Restricted Transactional Memory) flags.
if (!has_tcheck() && UseRTMLocking) {
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -356,6 +356,11 @@
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
+ if (UseVectorizedMismatchIntrinsic) {
+ warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
(cache_line_size > ContendedPaddingWidth))
ContendedPaddingWidth = cache_line_size;
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -9439,13 +9439,184 @@
pop(tmp1);
}
+void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
+ Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
+ assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.");
+ Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
+ Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL;
+ Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
+ Label SAME_TILL_END, DONE;
+ Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL;
+
+ //scale is in rcx in both Win64 and Unix
+ ShortBranchVerifier sbv(this);
+
+ shlq(length);
+ xorq(result, result);
+
+ cmpq(length, 8);
+ jcc(Assembler::equal, VECTOR8_LOOP);
+ jcc(Assembler::less, VECTOR4_TAIL);
+
+ if (UseAVX >= 2){
+
+ cmpq(length, 16);
+ jcc(Assembler::equal, VECTOR16_LOOP);
+ jcc(Assembler::less, VECTOR8_LOOP);
+
+ cmpq(length, 32);
+ jccb(Assembler::less, VECTOR16_TAIL);
+
+ subq(length, 32);
+ bind(VECTOR32_LOOP);
+ vmovdqu(rymm0, Address(obja, result));
+ vmovdqu(rymm1, Address(objb, result));
+ vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit);
+ vptest(rymm2, rymm2);
+ jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found
+ addq(result, 32);
+ subq(length, 32);
+ jccb(Assembler::greaterEqual, VECTOR32_LOOP);
+ addq(length, 32);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 32 bytes left //close the branch here.
+
+ bind(VECTOR16_TAIL);
+ cmpq(length, 16);
+ jccb(Assembler::less, VECTOR8_TAIL);
+ bind(VECTOR16_LOOP);
+ movdqu(rymm0, Address(obja, result));
+ movdqu(rymm1, Address(objb, result));
+ vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit);
+ ptest(rymm2, rymm2);
+ jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
+ addq(result, 16);
+ subq(length, 16);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 16 bytes left
+ } else {//regular intrinsics
+
+ cmpq(length, 16);
+ jccb(Assembler::less, VECTOR8_TAIL);
+
+ subq(length, 16);
+ bind(VECTOR16_LOOP);
+ movdqu(rymm0, Address(obja, result));
+ movdqu(rymm1, Address(objb, result));
+ pxor(rymm0, rymm1);
+ ptest(rymm0, rymm0);
+ jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
+ addq(result, 16);
+ subq(length, 16);
+ jccb(Assembler::greaterEqual, VECTOR16_LOOP);
+ addq(length, 16);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 16 bytes left
+ }
+
+ bind(VECTOR8_TAIL);
+ cmpq(length, 8);
+ jccb(Assembler::less, VECTOR4_TAIL);
+ bind(VECTOR8_LOOP);
+ movq(tmp1, Address(obja, result));
+ movq(tmp2, Address(objb, result));
+ xorq(tmp1, tmp2);
+ testq(tmp1, tmp1);
+ jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found
+ addq(result, 8);
+ subq(length, 8);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 8 bytes left
+
+ bind(VECTOR4_TAIL);
+ cmpq(length, 4);
+ jccb(Assembler::less, BYTES_TAIL);
+ bind(VECTOR4_LOOP);
+ movl(tmp1, Address(obja, result));
+ xorl(tmp1, Address(objb, result));
+ testl(tmp1, tmp1);
+ jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found
+ addq(result, 4);
+ subq(length, 4);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 4 bytes left
+
+ bind(BYTES_TAIL);
+ bind(BYTES_LOOP);
+ load_unsigned_byte(tmp1, Address(obja, result));
+ load_unsigned_byte(tmp2, Address(objb, result));
+ xorl(tmp1, tmp2);
+ testl(tmp1, tmp1);
+ jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ decq(length);
+ jccb(Assembler::zero, SAME_TILL_END);
+ incq(result);
+ load_unsigned_byte(tmp1, Address(obja, result));
+ load_unsigned_byte(tmp2, Address(objb, result));
+ xorl(tmp1, tmp2);
+ testl(tmp1, tmp1);
+ jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ decq(length);
+ jccb(Assembler::zero, SAME_TILL_END);
+ incq(result);
+ load_unsigned_byte(tmp1, Address(obja, result));
+ load_unsigned_byte(tmp2, Address(objb, result));
+ xorl(tmp1, tmp2);
+ testl(tmp1, tmp1);
+ jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ jmpb(SAME_TILL_END);
+
+ if (UseAVX >= 2){
+ bind(VECTOR32_NOT_EQUAL);
+ vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
+ vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
+ vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit);
+ vpmovmskb(tmp1, rymm0);
+ bsfq(tmp1, tmp1);
+ addq(result, tmp1);
+ shrq(result);
+ jmpb(DONE);
+ }
+
+ bind(VECTOR16_NOT_EQUAL);
+ if (UseAVX >= 2){
+ vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
+ vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
+ pxor(rymm0, rymm2);
+ } else {
+ pcmpeqb(rymm2, rymm2);
+ pxor(rymm0, rymm1);
+ pcmpeqb(rymm0, rymm1);
+ pxor(rymm0, rymm2);
+ }
+ pmovmskb(tmp1, rymm0);
+ bsfq(tmp1, tmp1);
+ addq(result, tmp1);
+ shrq(result);
+ jmpb(DONE);
+
+ bind(VECTOR8_NOT_EQUAL);
+ bind(VECTOR4_NOT_EQUAL);
+ bsfq(tmp1, tmp1);
+ shrq(tmp1, 3);
+ addq(result, tmp1);
+ bind(BYTES_NOT_EQUAL);
+ shrq(result);
+ jmpb(DONE);
+
+ bind(SAME_TILL_END);
+ mov64(result, -1);
+
+ bind(DONE);
+}
+
+
//Helper functions for square_to_len()
/**
* Store the squares of x[], right shifted one bit (divided by 2) into z[]
* Preserves x and z and modifies rest of the registers.
*/
-
void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
// Perform square and right shift by 1
// Handle odd xlen case first, then for even xlen do the following
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Mon Dec 07 16:35:07 2015 -0800
@@ -1346,7 +1346,6 @@
Register carry2);
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
-
void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
@@ -1365,6 +1364,9 @@
void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
Register raxReg);
+ void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
+ Register result, Register tmp1, Register tmp2,
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3);
#endif
// CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -4054,6 +4054,54 @@
return start;
}
+ /**
+ * Arguments:
+ *
+ * Input:
+ * c_rarg0 - obja address
+ * c_rarg1 - objb address
+ * c_rarg3 - length length
+ * c_rarg4 - scale log2_array_indxscale
+ */
+ address generate_vectorizedMismatch() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
+ address start = __ pc();
+
+ BLOCK_COMMENT("Entry:");
+ __ enter();
+
+#ifdef _WIN64 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
+ const Register scale = c_rarg0; //rcx, will exchange with r9
+ const Register objb = c_rarg1; //rdx
+ const Register length = c_rarg2; //r8
+ const Register obja = c_rarg3; //r9
+ __ xchgq(obja, scale); //now obja and scale contains the correct contents
+
+ const Register tmp1 = r10;
+ const Register tmp2 = r11;
+#endif
+#ifndef _WIN64 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
+ const Register obja = c_rarg0; //U:rdi
+ const Register objb = c_rarg1; //U:rsi
+ const Register length = c_rarg2; //U:rdx
+ const Register scale = c_rarg3; //U:rcx
+ const Register tmp1 = r8;
+ const Register tmp2 = r9;
+#endif
+ const Register result = rax; //return value
+ const XMMRegister vec0 = xmm0;
+ const XMMRegister vec1 = xmm1;
+ const XMMRegister vec2 = xmm2;
+
+ __ vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2);
+
+ __ leave();
+ __ ret(0);
+
+ return start;
+ }
+
/**
* Arguments:
*
@@ -4505,7 +4553,9 @@
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
-
+ if (UseVectorizedMismatchIntrinsic) {
+ StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
+ }
#ifndef _WINDOWS
if (UseMontgomeryMultiplyIntrinsic) {
StubRoutines::_montgomeryMultiply
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -1041,6 +1041,25 @@
}
}
+#ifdef _LP64
+ if (UseSSE42Intrinsics) {
+ if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
+ UseVectorizedMismatchIntrinsic = true;
+ }
+ } else if (UseVectorizedMismatchIntrinsic) {
+ if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
+ warning("vectorizedMismatch intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+#else
+ if (UseVectorizedMismatchIntrinsic) {
+ if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
+ warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
+ }
+ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+ }
+#endif // _LP64
+
// Use count leading zeros count instruction if available.
if (supports_lzcnt()) {
if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -681,6 +681,9 @@
case vmIntrinsics::_montgomerySquare:
if (!UseMontgomerySquareIntrinsic) return true;
break;
+ case vmIntrinsics::_vectorizedMismatch:
+ if (!UseVectorizedMismatchIntrinsic) return true;
+ break;
case vmIntrinsics::_addExactI:
case vmIntrinsics::_addExactL:
case vmIntrinsics::_decrementExactI:
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Mon Dec 07 16:35:07 2015 -0800
@@ -957,6 +957,11 @@
do_name( montgomerySquare_name, "implMontgomerySquare") \
do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \
\
+ do_class(java_util_ArraysSupport, "java/util/ArraysSupport") \
+ do_intrinsic(_vectorizedMismatch, java_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\
+ do_name(vectorizedMismatch_name, "vectorizedMismatch") \
+ do_signature(vectorizedMismatch_signature, "(Ljava/lang/Object;JLjava/lang/Object;JII)I") \
+ \
/* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\
--- a/hotspot/src/share/vm/opto/c2compiler.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -441,6 +441,7 @@
case vmIntrinsics::_mulAdd:
case vmIntrinsics::_montgomeryMultiply:
case vmIntrinsics::_montgomerySquare:
+ case vmIntrinsics::_vectorizedMismatch:
case vmIntrinsics::_ghash_processBlocks:
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:
--- a/hotspot/src/share/vm/opto/escape.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/opto/escape.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -987,7 +987,8 @@
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
- strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
+ strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0)
))) {
call->dump();
fatal("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name);
--- a/hotspot/src/share/vm/opto/library_call.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/opto/library_call.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -312,6 +312,7 @@
bool inline_mulAdd();
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
+ bool inline_vectorizedMismatch();
bool inline_profileBoolean();
bool inline_isCompileConstant();
@@ -720,6 +721,9 @@
case vmIntrinsics::_montgomerySquare:
return inline_montgomerySquare();
+ case vmIntrinsics::_vectorizedMismatch:
+ return inline_vectorizedMismatch();
+
case vmIntrinsics::_ghash_processBlocks:
return inline_ghash_processBlocks();
@@ -5581,6 +5585,50 @@
return true;
}
+//-------------inline_vectorizedMismatch------------------------------
+bool LibraryCallKit::inline_vectorizedMismatch() {
+ assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
+
+ address stubAddr = StubRoutines::vectorizedMismatch();
+ if (stubAddr == NULL) {
+ return false; // Intrinsic's stub is not implemented on this platform
+ }
+ const char* stubName = "vectorizedMismatch";
+ int size_l = callee()->signature()->size();
+ assert(callee()->signature()->size() == 8, "vectorizedMismatch has 6 parameters");
+
+ Node* obja = argument(0);
+ Node* aoffset = argument(1);
+ Node* objb = argument(3);
+ Node* boffset = argument(4);
+ Node* length = argument(6);
+ Node* scale = argument(7);
+
+ const Type* a_type = obja->Value(&_gvn);
+ const Type* b_type = objb->Value(&_gvn);
+ const TypeAryPtr* top_a = a_type->isa_aryptr();
+ const TypeAryPtr* top_b = b_type->isa_aryptr();
+ if (top_a == NULL || top_a->klass() == NULL ||
+ top_b == NULL || top_b->klass() == NULL) {
+ // failed array check
+ return false;
+ }
+
+ Node* call;
+ jvms()->set_should_reexecute(true);
+
+ Node* obja_adr = make_unsafe_address(obja, aoffset);
+ Node* objb_adr = make_unsafe_address(objb, boffset);
+
+ call = make_runtime_call(RC_LEAF,
+ OptoRuntime::vectorizedMismatch_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ obja_adr, objb_adr, length, scale);
+
+ Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+ set_result(result);
+ return true;
+}
/**
* Calculate CRC32 for byte.
--- a/hotspot/src/share/vm/opto/runtime.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/opto/runtime.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -1103,6 +1103,26 @@
return TypeFunc::make(domain, range);
}
+const TypeFunc* OptoRuntime::vectorizedMismatch_Type() {
+ // create input type (domain)
+ int num_args = 4;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // obja
+ fields[argp++] = TypePtr::NOTNULL; // objb
+ fields[argp++] = TypeInt::INT; // length, number of elements
+ fields[argp++] = TypeInt::INT; // log2scale, element size
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+ //return mismatch index (int)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms + 0] = TypeInt::INT;
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
+ return TypeFunc::make(domain, range);
+}
+
// GHASH block processing
const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
int argcnt = 4;
--- a/hotspot/src/share/vm/opto/runtime.hpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/opto/runtime.hpp Mon Dec 07 16:35:07 2015 -0800
@@ -299,6 +299,8 @@
static const TypeFunc* mulAdd_Type();
+ static const TypeFunc* vectorizedMismatch_Type();
+
static const TypeFunc* ghash_processBlocks_Type();
static const TypeFunc* updateBytesCRC32_Type();
--- a/hotspot/src/share/vm/runtime/globals.hpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/runtime/globals.hpp Mon Dec 07 16:35:07 2015 -0800
@@ -855,6 +855,9 @@
product(bool, UseAdler32Intrinsics, false, \
"use intrinsics for java.util.zip.Adler32") \
\
+ product(bool, UseVectorizedMismatchIntrinsic, false, \
+ "Enables intrinsification of ArraysSupport.vectorizedMismatch()") \
+ \
diagnostic(ccstrlist, DisableIntrinsic, "", \
"do not expand intrinsics whose (internal) names appear here") \
\
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -148,6 +148,8 @@
address StubRoutines::_montgomeryMultiply = NULL;
address StubRoutines::_montgomerySquare = NULL;
+address StubRoutines::_vectorizedMismatch = NULL;
+
address StubRoutines::_dexp = NULL;
address StubRoutines::_dlog = NULL;
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp Mon Dec 07 16:35:07 2015 -0800
@@ -207,6 +207,8 @@
static address _montgomeryMultiply;
static address _montgomerySquare;
+ static address _vectorizedMismatch;
+
static address _dexp;
static address _dlog;
@@ -376,6 +378,8 @@
static address montgomeryMultiply() { return _montgomeryMultiply; }
static address montgomerySquare() { return _montgomerySquare; }
+ static address vectorizedMismatch() { return _vectorizedMismatch; }
+
static address dexp() { return _dexp; }
static address dlog() { return _dlog; }
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp Mon Dec 07 15:00:46 2015 +0000
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Mon Dec 07 16:35:07 2015 -0800
@@ -860,6 +860,7 @@
static_field(StubRoutines, _mulAdd, address) \
static_field(StubRoutines, _dexp, address) \
static_field(StubRoutines, _dlog, address) \
+ static_field(StubRoutines, _vectorizedMismatch, address) \
static_field(StubRoutines, _jbyte_arraycopy, address) \
static_field(StubRoutines, _jshort_arraycopy, address) \
static_field(StubRoutines, _jint_arraycopy, address) \