# HG changeset patch # User lucy # Date 1501592890 -7200 # Node ID ff3f9838b730178b462a4f0a0c06f3ade7df5a6f # Parent 05423d4b10d202540fc60b2a1024d6d8200b7fea 8180823: [s390] Rework/optimize AES intrinsics Reviewed-by: mdoerr, goetz diff -r 05423d4b10d2 -r ff3f9838b730 hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp --- a/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp Mon Jul 31 12:37:34 2017 -0700 +++ b/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp Tue Aug 01 15:08:10 2017 +0200 @@ -1683,7 +1683,7 @@ // src must designate an even/odd register pair, holding the address/length of the original message // Helper function which generates code to - // - load the function code in register fCode (== Z_R0) + // - load the function code in register fCode (== Z_R0). // - load the data block length (depends on cipher function) into register srclen if requested. // - is_decipher switches between cipher/decipher function codes // - set_len requests (if true) loading the data block length in register srclen @@ -1695,13 +1695,13 @@ bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. - __ z_cghi(keylen, 52); - - __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); + __ z_cghi(keylen, 52); // Check only once at the beginning. keylen and fCode may share the same register. + + __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); if (!identical_dataBlk_len) { - __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); + __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); } - __ z_brh(fCode_set); // keyLen > 52: AES256 + __ z_brl(fCode_set); // keyLen < 52: AES128 __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); if (!identical_dataBlk_len) { @@ -1709,11 +1709,11 @@ } __ z_bre(fCode_set); // keyLen == 52: AES192 - __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); + __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); if (!identical_dataBlk_len) { - __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); + __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); } - // __ z_brl(fCode_set); // keyLen < 52: AES128 // fallthru + // __ z_brh(fCode_set); // keyLen < 52: AES128 // fallthru __ bind(fCode_set); if (identical_dataBlk_len) { @@ -1724,6 +1724,54 @@ } // Push a parameter block for the cipher/decipher instruction on the stack. + // Layout of the additional stack space allocated for AES_cipherBlockChaining: + // + // | | + // +--------+ <-- SP before expansion + // | | + // : : alignment loss, 0..(AES_parmBlk_align-8) bytes + // | | + // +--------+ + // | | + // : : space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_C + // | | + // +--------+ <-- parmBlk, octoword-aligned, start of parameter block + // | | + // : : additional stack space for spills etc., size AES_parmBlk_addspace, DW @ Z_SP not usable!!! + // | | + // +--------+ <-- Z_SP after expansion + + void generate_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode, + Register parmBlk, Register keylen, Register fCode, Register cv, Register key) { + const int AES_parmBlk_align = 32; // octoword alignment. + const int AES_parmBlk_addspace = 24; // Must be sufficiently large to hold all spilled registers + // (currently 2) PLUS 1 DW for the frame pointer. + + const int cv_len = dataBlk_len; + const int key_len = parmBlk_len - cv_len; + // This len must be known at JIT compile time. Only then are we able to recalc the SP before resize. + // We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space. + const int resize_len = cv_len + key_len + AES_parmBlk_align + AES_parmBlk_addspace; + + // Use parmBlk as temp reg here to hold the frame pointer. + __ resize_frame(-resize_len, parmBlk, true); + + // calculate parmBlk address from updated (resized) SP. + __ add2reg(parmBlk, resize_len - (cv_len + key_len), Z_SP); + __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. + + // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk). + __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. + + // calculate (SP before resize) from updated SP. + __ add2reg(keylen, resize_len, Z_SP); // keylen holds prev SP for now. + __ z_stg(keylen, -16, parmBlk); // Spill prev SP for easy revert. + + __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. + __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. + __ z_lghi(fCode, crypto_fCode); + } + // NOTE: // Before returning, the stub has to copy the chaining value from // the parmBlk, where it was updated by the crypto instruction, back @@ -1732,17 +1780,14 @@ // the key length across the KMC instruction. We do so by spilling it to the stack, // just preceding the parmBlk (at (parmBlk - 8)). void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) { - const int AES_parmBlk_align = 32; - const int AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align. - int cv_len, key_len; int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; BLOCK_COMMENT("push parmBlk {"); if (VM_Version::has_Crypto_AES() ) { __ z_cghi(keylen, 52); } - if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 + if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192 - if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 + if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 // Security net: requested AES function not available on this CPU. // NOTE: @@ -1751,71 +1796,35 @@ // at all, we have at least AES-128. __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0); - if (VM_Version::has_Crypto_AES128()) { - __ bind(parmBlk_128); - cv_len = VM_Version::Cipher::_AES128_dataBlk; - key_len = VM_Version::Cipher::_AES128_parmBlk_C - cv_len; - __ z_lay(parmBlk, -(VM_Version::Cipher::_AES128_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); - __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // align parameter block - - // Resize the frame to accommodate for the aligned parameter block and other stuff. - // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). - __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. - __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. - __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. - __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. - __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. - - __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. - __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. - __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); - if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { + if (VM_Version::has_Crypto_AES256()) { + __ bind(parmBlk_256); + generate_push_Block(VM_Version::Cipher::_AES256_dataBlk, + VM_Version::Cipher::_AES256_parmBlk_C, + VM_Version::Cipher::_AES256 + mode, + parmBlk, keylen, fCode, cv, key); + if (VM_Version::has_Crypto_AES128() || VM_Version::has_Crypto_AES192()) { __ z_bru(parmBlk_set); // Fallthru otherwise. } } if (VM_Version::has_Crypto_AES192()) { __ bind(parmBlk_192); - cv_len = VM_Version::Cipher::_AES192_dataBlk; - key_len = VM_Version::Cipher::_AES192_parmBlk_C - cv_len; - __ z_lay(parmBlk, -(VM_Version::Cipher::_AES192_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); - __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. - - // Resize the frame to accommodate for the aligned parameter block and other stuff. - // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). - __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. - __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. - __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. - __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. - __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. - - __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. - __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. - __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); - if (VM_Version::has_Crypto_AES256()) { + generate_push_Block(VM_Version::Cipher::_AES192_dataBlk, + VM_Version::Cipher::_AES192_parmBlk_C, + VM_Version::Cipher::_AES192 + mode, + parmBlk, keylen, fCode, cv, key); + if (VM_Version::has_Crypto_AES128()) { __ z_bru(parmBlk_set); // Fallthru otherwise. } } - if (VM_Version::has_Crypto_AES256()) { - __ bind(parmBlk_256); - cv_len = VM_Version::Cipher::_AES256_dataBlk; - key_len = VM_Version::Cipher::_AES256_parmBlk_C - cv_len; - __ z_lay(parmBlk, -(VM_Version::Cipher::_AES256_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); - __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. - - // Resize the frame to accommodate for the aligned parameter block and other stuff. - // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). - __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. - __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. - __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. - __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. - __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. - - __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. - __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. - __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); - // __ z_bru(parmBlk_set); // fallthru + if (VM_Version::has_Crypto_AES128()) { + __ bind(parmBlk_128); + generate_push_Block(VM_Version::Cipher::_AES128_dataBlk, + VM_Version::Cipher::_AES128_parmBlk_C, + VM_Version::Cipher::_AES128 + mode, + parmBlk, keylen, fCode, cv, key); + // Fallthru } __ bind(parmBlk_set); @@ -1871,41 +1880,49 @@ } __ bind(parmBlk_set); } - __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. + __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. Z_SP saved by push_parmBlk. BLOCK_COMMENT("} pop parmBlk"); } + // Compute AES encrypt/decrypt function. + void generate_AES_cipherBlock(bool is_decipher) { + // Incoming arguments. + Register from = Z_ARG1; // source byte array + Register to = Z_ARG2; // destination byte array + Register key = Z_ARG3; // expanded key array + + const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. + + // Register definitions as required by KM instruction. + const Register fCode = Z_R0; // crypto function code + const Register parmBlk = Z_R1; // parameter block address (points to crypto key) + const Register src = Z_ARG1; // Must be even reg (KM requirement). + const Register srclen = Z_ARG2; // Must be odd reg and pair with src. Overwrites destination address. + const Register dst = Z_ARG3; // Must be even reg (KM requirement). Overwrites expanded key address. + + // Read key len of expanded key (in 4-byte words). + __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + // Copy arguments to registers as required by crypto instruction. + __ z_lgr(parmBlk, key); // crypto key (in T_INT array). + __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. + __ z_lgr(dst, to); // Copy dst address, even register required. + + // Construct function code into fCode(Z_R0), data block length into srclen(Z_ARG2). + generate_load_AES_fCode(keylen, fCode, srclen, is_decipher); + + __ km(dst, src); // Cipher the message. + + __ z_br(Z_R14); + } + // Compute AES encrypt function. address generate_AES_encryptBlock(const char* name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); - unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). - - Register from = Z_ARG1; // source byte array - Register to = Z_ARG2; // destination byte array - Register key = Z_ARG3; // expanded key array - - const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. - const Register fCode = Z_R0; // crypto function code - const Register parmBlk = Z_R1; // parameter block address (points to crypto key) - const Register src = Z_ARG1; // is Z_R2 - const Register srclen = Z_ARG2; // Overwrites destination address. - const Register dst = Z_ARG3; // Overwrites expanded key address. - - // Read key len of expanded key (in 4-byte words). - __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - // Copy arguments to registers as required by crypto instruction. - __ z_lgr(parmBlk, key); // crypto key (in T_INT array). - // __ z_lgr(src, from); // Copy not needed, src/from are identical. - __ z_lgr(dst, to); // Copy destination address to even register. - - // Construct function code in Z_R0, data block length in Z_ARG2. - generate_load_AES_fCode(keylen, fCode, srclen, false); - - __ km(dst, src); // Cipher the message. - - __ z_br(Z_R14); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + generate_AES_cipherBlock(false); return __ addr_at(start_off); } @@ -1914,33 +1931,9 @@ address generate_AES_decryptBlock(const char* name) { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); - unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). - - Register from = Z_ARG1; // source byte array - Register to = Z_ARG2; // destination byte array - Register key = Z_ARG3; // expanded key array, not preset at entry!!! - - const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. - const Register fCode = Z_R0; // crypto function code - const Register parmBlk = Z_R1; // parameter block address (points to crypto key) - const Register src = Z_ARG1; // is Z_R2 - const Register srclen = Z_ARG2; // Overwrites destination address. - const Register dst = Z_ARG3; // Overwrites key address. - - // Read key len of expanded key (in 4-byte words). - __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - // Copy arguments to registers as required by crypto instruction. - __ z_lgr(parmBlk, key); // Copy crypto key address. - // __ z_lgr(src, from); // Copy not needed, src/from are identical. - __ z_lgr(dst, to); // Copy destination address to even register. - - // Construct function code in Z_R0, data block length in Z_ARG2. - generate_load_AES_fCode(keylen, fCode, srclen, true); - - __ km(dst, src); // Cipher the message. - - __ z_br(Z_R14); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + generate_AES_cipherBlock(true); return __ addr_at(start_off); } @@ -1958,10 +1951,7 @@ // We align the parameter block to the next available octoword. // // Compute chained AES encrypt function. - address generate_cipherBlockChaining_AES_encrypt(const char* name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + void generate_AES_cipherBlockChaining(bool is_decipher) { Register from = Z_ARG1; // source byte array (clear text) Register to = Z_ARG2; // destination byte array (ciphered) @@ -1981,20 +1971,29 @@ __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. - // Construct function code in Z_R0. - generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, false); + // Construct function code in fCode (Z_R0). + generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, is_decipher); // Prepare other registers for instruction. - // __ z_lgr(src, from); // Not needed, registers are the same. + __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. __ z_lgr(dst, to); - __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. - - __ kmc(dst, src); // Cipher the message. + __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. + + __ kmc(dst, src); // Cipher the message. generate_pop_parmBlk(keylen, parmBlk, key, cv); - __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. + __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. __ z_br(Z_R14); + } + + // Compute chained AES encrypt function. + address generate_cipherBlockChaining_AES_encrypt(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + generate_AES_cipherBlockChaining(false); return __ addr_at(start_off); } @@ -2005,38 +2004,7 @@ StubCodeMark mark(this, "StubRoutines", name); unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). - Register from = Z_ARG1; // source byte array (ciphered) - Register to = Z_ARG2; // destination byte array (clear text) - Register key = Z_ARG3; // expanded key array, not preset at entry!!! - Register cv = Z_ARG4; // chaining value - const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned - // in Z_RET upon completion of this stub. - - const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. - const Register fCode = Z_R0; // crypto function code - const Register parmBlk = Z_R1; // parameter block address (points to crypto key) - const Register src = Z_ARG1; // is Z_R2 - const Register srclen = Z_ARG2; // Overwrites destination address. - const Register dst = Z_ARG3; // Overwrites key address. - - // Read key len of expanded key (in 4-byte words). - __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. - // Construct function code in Z_R0. - generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, true); - - // Prepare other registers for instruction. - // __ z_lgr(src, from); // Not needed, registers are the same. - __ z_lgr(dst, to); - __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. - - __ kmc(dst, src); // Decipher the message. - - generate_pop_parmBlk(keylen, parmBlk, key, cv); - - __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. - __ z_br(Z_R14); + generate_AES_cipherBlockChaining(true); return __ addr_at(start_off); } diff -r 05423d4b10d2 -r ff3f9838b730 hotspot/src/cpu/s390/vm/vm_version_s390.cpp --- a/hotspot/src/cpu/s390/vm/vm_version_s390.cpp Mon Jul 31 12:37:34 2017 -0700 +++ b/hotspot/src/cpu/s390/vm/vm_version_s390.cpp Tue Aug 01 15:08:10 2017 +0200 @@ -1,6 +1,6 @@ /* * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 SAP SE. All rights reserved. + * Copyright (c) 2016, 2017 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -153,6 +153,10 @@ warning("AES intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + if (UseAESIntrinsics && !UseAES) { + warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } // TODO: implement AES/CTR intrinsics if (UseAESCTRIntrinsics) {