diff -r 0f1bd2e6bc48 -r db627462f2c9 hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Oct 21 17:55:02 2016 +0200 +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Oct 21 10:16:09 2016 -0700 @@ -3718,6 +3718,25 @@ return start; } + //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. + address generate_pshuffle_byte_flip_mask_sha512() { + __ align(32); + StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512"); + address start = __ pc(); + if (VM_Version::supports_avx2()) { + __ emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); + __ emit_data64(0x1011121314151617, relocInfo::none); + __ emit_data64(0x18191a1b1c1d1e1f, relocInfo::none); + __ emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + } + + return start; + } + // ofs and limit are use for multi-block byte array. // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) address generate_sha256_implCompress(bool multi_block, const char *name) { @@ -3761,6 +3780,39 @@ return start; } + address generate_sha512_implCompress(bool multi_block, const char *name) { + assert(VM_Version::supports_avx2(), ""); + assert(VM_Version::supports_bmi2(), ""); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + + const XMMRegister shuf_mask = xmm8; + + __ enter(); + + __ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + + __ leave(); + __ ret(0); + return start; + } + // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time // to hide instruction latency // @@ -5081,6 +5133,12 @@ StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); } + if (UseSHA512Intrinsics) { + StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W; + StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512(); + StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); + StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); + } // Generate GHASH intrinsics code if (UseGHASHIntrinsics) {