src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp
author kvn
Thu, 07 Nov 2019 17:47:22 -0800
changeset 58977 c6a789f495fe
parent 57786 948ac3112da8
permissions -rw-r--r--
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions Reviewed-by: kvn Contributed-by: smita.kamath@intel.com, regev.shemy@intel.com, shay.gueron@intel.com
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
52990
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     1
/*
58977
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
     2
* Copyright (c) 2019, Intel Corporation.
52990
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     3
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     5
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     6
* This code is free software; you can redistribute it and/or modify it
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     7
* under the terms of the GNU General Public License version 2 only, as
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     8
* published by the Free Software Foundation.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     9
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    10
* This code is distributed in the hope that it will be useful, but WITHOUT
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    12
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    13
* version 2 for more details (a copy is included in the LICENSE file that
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    14
* accompanied this code).
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    15
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    16
* You should have received a copy of the GNU General Public License version
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    17
* 2 along with this work; if not, write to the Free Software Foundation,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    19
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    21
* or visit www.oracle.com if you need additional information or have any
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    22
* questions.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    23
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    24
*/
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    25
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    26
#include "precompiled.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    27
#include "asm/assembler.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    28
#include "asm/assembler.inline.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    29
#include "runtime/stubRoutines.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    30
#include "macroAssembler_x86.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    31
53017
e10a1f7aaa13 8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents: 52990
diff changeset
    32
#ifdef _LP64
57786
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    33
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    34
void MacroAssembler::roundEnc(XMMRegister key, int rnum) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    35
    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    36
      vaesenc(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    37
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    38
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    39
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    40
void MacroAssembler::lastroundEnc(XMMRegister key, int rnum) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    41
    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    42
      vaesenclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    43
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    44
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    45
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    46
void MacroAssembler::roundDec(XMMRegister key, int rnum) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    47
    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    48
      vaesdec(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    49
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    50
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    51
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    52
void MacroAssembler::lastroundDec(XMMRegister key, int rnum) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    53
    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    54
      vaesdeclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    55
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    56
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    57
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    58
// Load key and shuffle operation
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    59
void MacroAssembler::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    60
    movdqu(xmmdst, Address(key, offset));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    61
    if (xmm_shuf_mask != NULL) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    62
        pshufb(xmmdst, xmm_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    63
    } else {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    64
       pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    65
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    66
   evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    67
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    68
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    69
// AES-ECB Encrypt Operation
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    70
void MacroAssembler::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    71
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    72
    const Register pos = rax;
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    73
    const Register rounds = r12;
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    74
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    75
    Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT;
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    76
    push(r13);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    77
    push(r12);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    78
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    79
    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    80
    // context for the registers used, where all instructions below are using 128-bit mode
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    81
    // On EVEX without VL and BW, these instructions will all be AVX.
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    82
    if (VM_Version::supports_avx512vlbw()) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    83
       movl(rax, 0xffff);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    84
       kmovql(k1, rax);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    85
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    86
    push(len); // Save
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    87
    push(rbx);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    88
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    89
    vzeroupper();
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    90
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    91
    xorptr(pos, pos);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    92
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    93
    // Calculate number of rounds based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    94
    movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    95
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    96
    // Load Key shuf mask
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    97
    const XMMRegister xmm_key_shuf_mask = xmm31;  // used temporarily to swap key bytes up front
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    98
    movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
    99
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   100
    // Load and shuffle key based on number of rounds
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   101
    ev_load_key(xmm8, key, 0 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   102
    ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   103
    ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   104
    ev_load_key(xmm23, key, 3 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   105
    ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   106
    ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   107
    ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   108
    ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   109
    ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   110
    ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   111
    ev_load_key(xmm24, key, 10 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   112
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   113
    jcc(Assembler::greaterEqual, KEY_192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   114
    jmp(Loop_start);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   115
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   116
    bind(KEY_192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   117
    ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   118
    ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   119
    cmpl(rounds, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   120
    jcc(Assembler::equal, KEY_256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   121
    jmp(Loop_start);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   122
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   123
    bind(KEY_256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   124
    ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   125
    ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   126
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   127
    bind(Loop_start);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   128
    movq(rbx, len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   129
    // Divide length by 16 to convert it to number of blocks
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   130
    shrq(len, 4);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   131
    shlq(rbx, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   132
    jcc(Assembler::equal, NO_PARTS);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   133
    addq(len, 1);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   134
    // Check if number of blocks is greater than or equal to 32
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   135
    // If true, 512 bytes are processed at a time (code marked by label LOOP)
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   136
    // If not, 16 bytes are processed (code marked by REMAINDER label)
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   137
    bind(NO_PARTS);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   138
    movq(rbx, len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   139
    shrq(len, 5);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   140
    jcc(Assembler::equal, REMAINDER);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   141
    movl(r13, len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   142
    // Compute number of blocks that will be processed 512 bytes at a time
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   143
    // Subtract this from the total number of blocks which will then be processed by REMAINDER loop
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   144
    shlq(r13, 5);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   145
    subq(rbx, r13);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   146
    //Begin processing 512 bytes
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   147
    bind(LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   148
    // Move 64 bytes of PT data into a zmm register, as a result 512 bytes of PT loaded in zmm0-7
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   149
    evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   150
    evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   151
    evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   152
    evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   153
    evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   154
    evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   155
    evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   156
    evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   157
    // Xor with the first round key
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   158
    evpxorq(xmm0, xmm0, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   159
    evpxorq(xmm1, xmm1, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   160
    evpxorq(xmm2, xmm2, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   161
    evpxorq(xmm3, xmm3, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   162
    evpxorq(xmm4, xmm4, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   163
    evpxorq(xmm5, xmm5, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   164
    evpxorq(xmm6, xmm6, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   165
    evpxorq(xmm7, xmm7, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   166
    // 9 Aes encode round operations
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   167
    roundEnc(xmm9,  7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   168
    roundEnc(xmm10, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   169
    roundEnc(xmm23, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   170
    roundEnc(xmm12, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   171
    roundEnc(xmm13, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   172
    roundEnc(xmm14, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   173
    roundEnc(xmm15, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   174
    roundEnc(xmm16, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   175
    roundEnc(xmm17, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   176
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   177
    jcc(Assembler::aboveEqual, AES192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   178
    // Aesenclast round operation for keysize = 128
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   179
    lastroundEnc(xmm24, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   180
    jmp(END_LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   181
    //Additional 2 rounds of Aesenc operation for keysize = 192
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   182
    bind(AES192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   183
    roundEnc(xmm24, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   184
    roundEnc(xmm19, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   185
    cmpl(rounds, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   186
    jcc(Assembler::aboveEqual, AES256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   187
    // Aesenclast round for keysize = 192
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   188
    lastroundEnc(xmm20, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   189
    jmp(END_LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   190
    // 2 rounds of Aesenc operation and Aesenclast for keysize = 256
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   191
    bind(AES256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   192
    roundEnc(xmm20, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   193
    roundEnc(xmm21, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   194
    lastroundEnc(xmm22, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   195
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   196
    bind(END_LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   197
    // Move 512 bytes of CT to destination
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   198
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   199
    evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   200
    evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   201
    evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   202
    evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   203
    evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   204
    evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   205
    evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   206
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   207
    addq(pos, 512);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   208
    decq(len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   209
    jcc(Assembler::notEqual, LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   210
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   211
    bind(REMAINDER);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   212
    vzeroupper();
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   213
    cmpq(rbx, 0);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   214
    jcc(Assembler::equal, END);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   215
    // Process 16 bytes at a time
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   216
    bind(LOOP2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   217
    movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   218
    vpxor(xmm1, xmm1, xmm8, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   219
    // xmm2 contains shuffled key for Aesenclast operation.
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   220
    vmovdqu(xmm2, xmm24);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   221
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   222
    vaesenc(xmm1, xmm1, xmm9, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   223
    vaesenc(xmm1, xmm1, xmm10, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   224
    vaesenc(xmm1, xmm1, xmm23, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   225
    vaesenc(xmm1, xmm1, xmm12, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   226
    vaesenc(xmm1, xmm1, xmm13, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   227
    vaesenc(xmm1, xmm1, xmm14, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   228
    vaesenc(xmm1, xmm1, xmm15, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   229
    vaesenc(xmm1, xmm1, xmm16, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   230
    vaesenc(xmm1, xmm1, xmm17, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   231
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   232
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   233
    jcc(Assembler::below, LAST2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   234
    vmovdqu(xmm2, xmm20);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   235
    vaesenc(xmm1, xmm1, xmm24, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   236
    vaesenc(xmm1, xmm1, xmm19, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   237
    cmpl(rounds, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   238
    jcc(Assembler::below, LAST2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   239
    vmovdqu(xmm2, xmm22);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   240
    vaesenc(xmm1, xmm1, xmm20, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   241
    vaesenc(xmm1, xmm1, xmm21, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   242
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   243
    bind(LAST2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   244
    // Aesenclast round
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   245
    vaesenclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   246
    // Write 16 bytes of CT to destination
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   247
    movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   248
    addq(pos, 16);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   249
    decq(rbx);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   250
    jcc(Assembler::notEqual, LOOP2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   251
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   252
    bind(END);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   253
    // Zero out the round keys
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   254
    evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   255
    evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   256
    evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   257
    evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   258
    evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   259
    evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   260
    evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   261
    evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   262
    evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   263
    evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   264
    evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   265
    cmpl(rounds, 44);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   266
    jcc(Assembler::belowEqual, EXIT);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   267
    evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   268
    evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   269
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   270
    jcc(Assembler::belowEqual, EXIT);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   271
    evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   272
    evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   273
    bind(EXIT);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   274
    pop(rbx);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   275
    pop(rax); // return length
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   276
    pop(r12);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   277
    pop(r13);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   278
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   279
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   280
// AES-ECB Decrypt Operation
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   281
void MacroAssembler::aesecb_decrypt(Register src_addr, Register dest_addr, Register key, Register len)  {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   282
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   283
    Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT;
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   284
    const Register pos = rax;
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   285
    const Register rounds = r12;
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   286
    push(r13);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   287
    push(r12);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   288
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   289
    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   290
    // context for the registers used, where all instructions below are using 128-bit mode
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   291
    // On EVEX without VL and BW, these instructions will all be AVX.
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   292
    if (VM_Version::supports_avx512vlbw()) {
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   293
       movl(rax, 0xffff);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   294
       kmovql(k1, rax);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   295
    }
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   296
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   297
    push(len); // Save
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   298
    push(rbx);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   299
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   300
    vzeroupper();
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   301
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   302
    xorptr(pos, pos);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   303
    // Calculate number of rounds i.e. based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   304
    movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   305
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   306
    // Load Key shuf mask
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   307
    const XMMRegister xmm_key_shuf_mask = xmm31;  // used temporarily to swap key bytes up front
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   308
    movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   309
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   310
    // Load and shuffle round keys. The java expanded key ordering is rotated one position in decryption.
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   311
    // So the first round key is loaded from 1*16 here and last round key is loaded from 0*16
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   312
    ev_load_key(xmm9,  key, 1 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   313
    ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   314
    ev_load_key(xmm11, key, 3 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   315
    ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   316
    ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   317
    ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   318
    ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   319
    ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   320
    ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   321
    ev_load_key(xmm18, key, 10 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   322
    ev_load_key(xmm27, key, 0 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   323
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   324
    jcc(Assembler::greaterEqual, KEY_192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   325
    jmp(Loop_start);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   326
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   327
    bind(KEY_192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   328
    ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   329
    ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   330
    cmpl(rounds, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   331
    jcc(Assembler::equal, KEY_256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   332
    jmp(Loop_start);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   333
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   334
    bind(KEY_256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   335
    ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   336
    ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   337
    bind(Loop_start);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   338
    movq(rbx, len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   339
    // Convert input length to number of blocks
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   340
    shrq(len, 4);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   341
    shlq(rbx, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   342
    jcc(Assembler::equal, NO_PARTS);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   343
    addq(len, 1);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   344
    // Check if number of blocks is greater than/ equal to 32
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   345
    // If true, blocks then 512 bytes are processed at a time (code marked by label LOOP)
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   346
    // If not, 16 bytes are processed (code marked by label REMAINDER)
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   347
    bind(NO_PARTS);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   348
    movq(rbx, len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   349
    shrq(len, 5);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   350
    jcc(Assembler::equal, REMAINDER);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   351
    movl(r13, len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   352
    // Compute number of blocks that will be processed as 512 bytes at a time
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   353
    // Subtract this from the total number of blocks, which will then be processed by REMAINDER loop.
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   354
    shlq(r13, 5);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   355
    subq(rbx, r13);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   356
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   357
    bind(LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   358
    // Move 64 bytes of CT data into a zmm register, as a result 512 bytes of CT loaded in zmm0-7
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   359
    evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   360
    evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   361
    evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   362
    evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   363
    evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   364
    evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   365
    evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   366
    evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   367
    // Xor with the first round key
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   368
    evpxorq(xmm0, xmm0, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   369
    evpxorq(xmm1, xmm1, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   370
    evpxorq(xmm2, xmm2, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   371
    evpxorq(xmm3, xmm3, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   372
    evpxorq(xmm4, xmm4, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   373
    evpxorq(xmm5, xmm5, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   374
    evpxorq(xmm6, xmm6, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   375
    evpxorq(xmm7, xmm7, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   376
    // 9 rounds of Aesdec
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   377
    roundDec(xmm10, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   378
    roundDec(xmm11, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   379
    roundDec(xmm12, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   380
    roundDec(xmm13, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   381
    roundDec(xmm14, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   382
    roundDec(xmm15, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   383
    roundDec(xmm16, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   384
    roundDec(xmm17, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   385
    roundDec(xmm18, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   386
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   387
    jcc(Assembler::aboveEqual, AES192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   388
    // Aesdeclast round for keysize = 128
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   389
    lastroundDec(xmm27, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   390
    jmp(END_LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   391
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   392
    bind(AES192);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   393
    // 2 Additional rounds for keysize = 192
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   394
    roundDec(xmm19, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   395
    roundDec(xmm20, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   396
    cmpl(rounds, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   397
    jcc(Assembler::aboveEqual, AES256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   398
    // Aesdeclast round for keysize = 192
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   399
    lastroundDec(xmm27, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   400
    jmp(END_LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   401
    bind(AES256);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   402
    // 2 Additional rounds and Aesdeclast for keysize = 256
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   403
    roundDec(xmm21, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   404
    roundDec(xmm22, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   405
    lastroundDec(xmm27, 7);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   406
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   407
    bind(END_LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   408
    // Write 512 bytes of PT to the destination
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   409
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   410
    evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   411
    evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   412
    evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   413
    evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   414
    evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   415
    evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   416
    evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   417
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   418
    addq(pos, 512);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   419
    decq(len);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   420
    jcc(Assembler::notEqual, LOOP);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   421
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   422
    bind(REMAINDER);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   423
    vzeroupper();
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   424
    cmpq(rbx, 0);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   425
    jcc(Assembler::equal, END);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   426
    // Process 16 bytes at a time
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   427
    bind(LOOP2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   428
    movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0));
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   429
    vpxor(xmm1, xmm1, xmm9, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   430
    // xmm2 contains shuffled key for Aesdeclast operation.
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   431
    vmovdqu(xmm2, xmm27);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   432
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   433
    vaesdec(xmm1, xmm1, xmm10, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   434
    vaesdec(xmm1, xmm1, xmm11, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   435
    vaesdec(xmm1, xmm1, xmm12, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   436
    vaesdec(xmm1, xmm1, xmm13, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   437
    vaesdec(xmm1, xmm1, xmm14, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   438
    vaesdec(xmm1, xmm1, xmm15, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   439
    vaesdec(xmm1, xmm1, xmm16, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   440
    vaesdec(xmm1, xmm1, xmm17, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   441
    vaesdec(xmm1, xmm1, xmm18, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   442
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   443
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   444
    jcc(Assembler::below, LAST2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   445
    vaesdec(xmm1, xmm1, xmm19, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   446
    vaesdec(xmm1, xmm1, xmm20, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   447
    cmpl(rounds, 60);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   448
    jcc(Assembler::below, LAST2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   449
    vaesdec(xmm1, xmm1, xmm21, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   450
    vaesdec(xmm1, xmm1, xmm22, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   451
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   452
    bind(LAST2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   453
    // Aesdeclast round
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   454
    vaesdeclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   455
    // Write 16 bytes of PT to destination
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   456
    movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   457
    addq(pos, 16);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   458
    decq(rbx);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   459
    jcc(Assembler::notEqual, LOOP2);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   460
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   461
    bind(END);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   462
    // Zero out the round keys
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   463
    evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   464
    evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   465
    evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   466
    evpxorq(xmm11, xmm11, xmm11, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   467
    evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   468
    evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   469
    evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   470
    evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   471
    evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   472
    evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   473
    evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   474
    evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   475
    cmpl(rounds, 44);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   476
    jcc(Assembler::belowEqual, EXIT);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   477
    evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   478
    evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   479
    cmpl(rounds, 52);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   480
    jcc(Assembler::belowEqual, EXIT);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   481
    evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   482
    evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   483
    bind(EXIT);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   484
    pop(rbx);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   485
    pop(rax); // return length
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   486
    pop(r12);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   487
    pop(r13);
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   488
}
948ac3112da8 8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents: 53017
diff changeset
   489
52990
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   490
// Multiply 128 x 128 bits, using 4 pclmulqdq operations
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   491
void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   492
    XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   493
    movdqu(xmm15, Address(htbl, i * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   494
    vpclmulhqlqdq(tmp3, data, xmm15); // 0x01
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   495
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   496
    vpclmulldq(tmp3, data, xmm15); // 0x00
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   497
    vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   498
    vpclmulhdq(tmp3, data, xmm15); // 0x11
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   499
    vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   500
    vpclmullqhqdq(tmp3, data, xmm15); // 0x10
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   501
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   502
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   503
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   504
// Multiply two 128 bit numbers resulting in a 256 bit value
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   505
// Result of the multiplication followed by reduction stored in state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   506
void MacroAssembler::gfmul(XMMRegister tmp0, XMMRegister state) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   507
    const XMMRegister tmp1 = xmm4;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   508
    const XMMRegister tmp2 = xmm5;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   509
    const XMMRegister tmp3 = xmm6;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   510
    const XMMRegister tmp4 = xmm7;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   511
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   512
    vpclmulldq(tmp1, state, tmp0); //0x00  (a0 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   513
    vpclmulhdq(tmp4, state, tmp0);//0x11 (a1 * b1)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   514
    vpclmullqhqdq(tmp2, state, tmp0);//0x10 (a1 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   515
    vpclmulhqlqdq(tmp3, state, tmp0); //0x01 (a0 * b1)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   516
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   517
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); // (a0 * b1) + (a1 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   518
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   519
    vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   520
    vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   521
    vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit); // tmp1 and tmp4 hold the result
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   522
    vpxor(tmp4, tmp4, tmp2, Assembler::AVX_128bit); // of carryless multiplication
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   523
    // Follows the reduction technique mentioned in
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   524
    // Shift-XOR reduction described in Gueron-Kounavis May 2010
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   525
    // First phase of reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   526
    //
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   527
    vpslld(xmm8, tmp1, 31, Assembler::AVX_128bit); // packed right shift shifting << 31
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   528
    vpslld(xmm9, tmp1, 30, Assembler::AVX_128bit); // packed right shift shifting << 30
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   529
    vpslld(xmm10, tmp1, 25, Assembler::AVX_128bit);// packed right shift shifting << 25
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   530
    // xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   531
    vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   532
    vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   533
    vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   534
    vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   535
    vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit);// first phase of the reduction complete
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   536
    //
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   537
    // Second phase of the reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   538
    //
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   539
    vpsrld(xmm9, tmp1, 1, Assembler::AVX_128bit);// packed left shifting >> 1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   540
    vpsrld(xmm10, tmp1, 2, Assembler::AVX_128bit);// packed left shifting >> 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   541
    vpsrld(xmm11, tmp1, 7, Assembler::AVX_128bit);// packed left shifting >> 7
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   542
    vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit);// xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   543
    vpxor(xmm9, xmm9, xmm11, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   544
    vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   545
    vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   546
    vpxor(state, tmp4, tmp1, Assembler::AVX_128bit);// the result is in state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   547
    ret(0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   548
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   549
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   550
// This method takes the subkey after expansion as input and generates 1 * 16 power of subkey H.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   551
// The power of H is used in reduction process for one block ghash
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   552
void MacroAssembler::generateHtbl_one_block(Register htbl) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   553
    const XMMRegister t = xmm13;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   554
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   555
    // load the original subkey hash
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   556
    movdqu(t, Address(htbl, 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   557
    // shuffle using long swap mask
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   558
    movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   559
    vpshufb(t, t, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   560
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   561
    // Compute H' = GFMUL(H, 2)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   562
    vpsrld(xmm3, t, 7, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   563
    movdqu(xmm4, ExternalAddress(StubRoutines::x86::ghash_shufflemask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   564
    vpshufb(xmm3, xmm3, xmm4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   565
    movl(rax, 0xff00);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   566
    movdl(xmm4, rax);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   567
    vpshufb(xmm4, xmm4, xmm3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   568
    movdqu(xmm5, ExternalAddress(StubRoutines::x86::ghash_polynomial_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   569
    vpand(xmm5, xmm5, xmm4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   570
    vpsrld(xmm3, t, 31, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   571
    vpslld(xmm4, t, 1, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   572
    vpslldq(xmm3, xmm3, 4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   573
    vpxor(t, xmm4, xmm3, Assembler::AVX_128bit);// t holds p(x) <<1 or H * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   574
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   575
    //Adding p(x)<<1 to xmm5 which holds the reduction polynomial
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   576
    vpxor(t, t, xmm5, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   577
    movdqu(Address(htbl, 1 * 16), t); // H * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   578
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   579
    ret(0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   580
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   581
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   582
// This method takes the subkey after expansion as input and generates the remaining powers of subkey H.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   583
// The power of H is used in reduction process for eight block ghash
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   584
void MacroAssembler::generateHtbl_eight_blocks(Register htbl) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   585
    const XMMRegister t = xmm13;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   586
    const XMMRegister tmp0 = xmm1;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   587
    Label GFMUL;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   588
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   589
    movdqu(t, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   590
    movdqu(tmp0, t);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   591
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   592
    // tmp0 and t hold H. Now we compute powers of H by using GFMUL(H, H)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   593
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   594
    movdqu(Address(htbl, 2 * 16), t); //H ^ 2 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   595
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   596
    movdqu(Address(htbl, 3 * 16), t); //H ^ 3 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   597
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   598
    movdqu(Address(htbl, 4 * 16), t); //H ^ 4 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   599
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   600
    movdqu(Address(htbl, 5 * 16), t); //H ^ 5 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   601
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   602
    movdqu(Address(htbl, 6 * 16), t); //H ^ 6 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   603
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   604
    movdqu(Address(htbl, 7 * 16), t); //H ^ 7 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   605
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   606
    movdqu(Address(htbl, 8 * 16), t); //H ^ 8 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   607
    ret(0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   608
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   609
    bind(GFMUL);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   610
    gfmul(tmp0, t);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   611
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   612
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   613
// Multiblock and single block GHASH computation using Shift XOR reduction technique
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   614
void MacroAssembler::avx_ghash(Register input_state, Register htbl,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   615
    Register input_data, Register blocks) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   616
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   617
    // temporary variables to hold input data and input state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   618
    const XMMRegister data = xmm1;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   619
    const XMMRegister state = xmm0;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   620
    // temporary variables to hold intermediate results
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   621
    const XMMRegister tmp0 = xmm3;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   622
    const XMMRegister tmp1 = xmm4;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   623
    const XMMRegister tmp2 = xmm5;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   624
    const XMMRegister tmp3 = xmm6;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   625
    // temporary variables to hold byte and long swap masks
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   626
    const XMMRegister bswap_mask = xmm2;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   627
    const XMMRegister lswap_mask = xmm14;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   628
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   629
    Label GENERATE_HTBL_1_BLK, GENERATE_HTBL_8_BLKS, BEGIN_PROCESS, GFMUL, BLOCK8_REDUCTION,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   630
          ONE_BLK_INIT, PROCESS_1_BLOCK, PROCESS_8_BLOCKS, SAVE_STATE, EXIT_GHASH;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   631
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   632
    testptr(blocks, blocks);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   633
    jcc(Assembler::zero, EXIT_GHASH);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   634
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   635
    // Check if Hashtable (1*16) has been already generated
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   636
    // For anything less than 8 blocks, we generate only the first power of H.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   637
    movdqu(tmp2, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   638
    ptest(tmp2, tmp2);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   639
    jcc(Assembler::notZero, BEGIN_PROCESS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   640
    call(GENERATE_HTBL_1_BLK, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   641
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   642
    // Shuffle the input state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   643
    bind(BEGIN_PROCESS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   644
    movdqu(lswap_mask, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   645
    movdqu(state, Address(input_state, 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   646
    vpshufb(state, state, lswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   647
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   648
    cmpl(blocks, 8);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   649
    jcc(Assembler::below, ONE_BLK_INIT);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   650
    // If we have 8 blocks or more data, then generate remaining powers of H
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   651
    movdqu(tmp2, Address(htbl, 8 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   652
    ptest(tmp2, tmp2);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   653
    jcc(Assembler::notZero, PROCESS_8_BLOCKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   654
    call(GENERATE_HTBL_8_BLKS, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   655
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   656
    //Do 8 multiplies followed by a reduction processing 8 blocks of data at a time
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   657
    //Each block = 16 bytes.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   658
    bind(PROCESS_8_BLOCKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   659
    subl(blocks, 8);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   660
    movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   661
    movdqu(data, Address(input_data, 16 * 7));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   662
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   663
    //Loading 1*16 as calculated powers of H required starts at that location.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   664
    movdqu(xmm15, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   665
    //Perform carryless multiplication of (H*2, data block #7)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   666
    vpclmulhqlqdq(tmp2, data, xmm15);//a0 * b1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   667
    vpclmulldq(tmp0, data, xmm15);//a0 * b0
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   668
    vpclmulhdq(tmp1, data, xmm15);//a1 * b1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   669
    vpclmullqhqdq(tmp3, data, xmm15);//a1* b0
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   670
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);// (a0 * b1) + (a1 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   671
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   672
    movdqu(data, Address(input_data, 16 * 6));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   673
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   674
    // Perform carryless multiplication of (H^2 * 2, data block #6)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   675
    schoolbookAAD(2, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   676
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   677
    movdqu(data, Address(input_data, 16 * 5));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   678
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   679
    // Perform carryless multiplication of (H^3 * 2, data block #5)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   680
    schoolbookAAD(3, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   681
    movdqu(data, Address(input_data, 16 * 4));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   682
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   683
    // Perform carryless multiplication of (H^4 * 2, data block #4)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   684
    schoolbookAAD(4, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   685
    movdqu(data, Address(input_data, 16 * 3));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   686
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   687
    // Perform carryless multiplication of (H^5 * 2, data block #3)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   688
    schoolbookAAD(5, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   689
    movdqu(data, Address(input_data, 16 * 2));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   690
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   691
    // Perform carryless multiplication of (H^6 * 2, data block #2)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   692
    schoolbookAAD(6, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   693
    movdqu(data, Address(input_data, 16 * 1));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   694
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   695
    // Perform carryless multiplication of (H^7 * 2, data block #1)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   696
    schoolbookAAD(7, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   697
    movdqu(data, Address(input_data, 16 * 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   698
    // xor data block#0 with input state before perfoming carry-less multiplication
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   699
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   700
    vpxor(data, data, state, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   701
    // Perform carryless multiplication of (H^8 * 2, data block #0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   702
    schoolbookAAD(8, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   703
    vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   704
    vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   705
    vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);// tmp0, tmp1 contains aggregated results of
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   706
    vpxor(tmp1, tmp1, tmp2, Assembler::AVX_128bit);// the multiplication operation
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   707
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   708
    // we have the 2 128-bit partially accumulated multiplication results in tmp0:tmp1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   709
    // with higher 128-bit in tmp1 and lower 128-bit in corresponding tmp0
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   710
    // Follows the reduction technique mentioned in
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   711
    // Shift-XOR reduction described in Gueron-Kounavis May 2010
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   712
    bind(BLOCK8_REDUCTION);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   713
    // First Phase of the reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   714
    vpslld(xmm8, tmp0, 31, Assembler::AVX_128bit); // packed right shifting << 31
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   715
    vpslld(xmm9, tmp0, 30, Assembler::AVX_128bit); // packed right shifting << 30
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   716
    vpslld(xmm10, tmp0, 25, Assembler::AVX_128bit); // packed right shifting << 25
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   717
    // xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   718
    vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   719
    vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   720
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   721
    vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   722
    vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   723
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   724
    vpxor(tmp0, tmp0, xmm9, Assembler::AVX_128bit); // first phase of reduction is complete
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   725
    // second phase of the reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   726
    vpsrld(xmm9, tmp0, 1, Assembler::AVX_128bit); // packed left shifting >> 1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   727
    vpsrld(xmm10, tmp0, 2, Assembler::AVX_128bit); // packed left shifting >> 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   728
    vpsrld(tmp2, tmp0, 7, Assembler::AVX_128bit); // packed left shifting >> 7
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   729
    // xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   730
    vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   731
    vpxor(xmm9, xmm9, tmp2, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   732
    vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   733
    vpxor(tmp0, xmm9, tmp0, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   734
    // Final result is in state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   735
    vpxor(state, tmp0, tmp1, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   736
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   737
    lea(input_data, Address(input_data, 16 * 8));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   738
    cmpl(blocks, 8);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   739
    jcc(Assembler::below, ONE_BLK_INIT);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   740
    jmp(PROCESS_8_BLOCKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   741
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   742
    // Since this is one block operation we will only use H * 2 i.e. the first power of H
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   743
    bind(ONE_BLK_INIT);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   744
    movdqu(tmp0, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   745
    movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   746
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   747
    //Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a reduction.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   748
    bind(PROCESS_1_BLOCK);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   749
    cmpl(blocks, 0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   750
    jcc(Assembler::equal, SAVE_STATE);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   751
    subl(blocks, 1);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   752
    movdqu(data, Address(input_data, 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   753
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   754
    vpxor(state, state, data, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   755
    // gfmul(H*2, state)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   756
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   757
    addptr(input_data, 16);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   758
    jmp(PROCESS_1_BLOCK);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   759
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   760
    bind(SAVE_STATE);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   761
    vpshufb(state, state, lswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   762
    movdqu(Address(input_state, 0), state);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   763
    jmp(EXIT_GHASH);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   764
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   765
    bind(GFMUL);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   766
    gfmul(tmp0, state);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   767
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   768
    bind(GENERATE_HTBL_1_BLK);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   769
    generateHtbl_one_block(htbl);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   770
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   771
    bind(GENERATE_HTBL_8_BLKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   772
    generateHtbl_eight_blocks(htbl);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   773
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   774
    bind(EXIT_GHASH);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   775
    // zero out xmm registers used for Htbl storage
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   776
    vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   777
    vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   778
    vpxor(xmm3, xmm3, xmm3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   779
    vpxor(xmm15, xmm15, xmm15, Assembler::AVX_128bit);
53017
e10a1f7aaa13 8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents: 52990
diff changeset
   780
}
58977
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   781
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   782
// AES Counter Mode using VAES instructions
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   783
void MacroAssembler::aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   784
    Register len_reg, Register used, Register used_addr, Register saved_encCounter_start) {
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   785
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   786
    const Register rounds = 0;
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   787
    const Register pos = r12;
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   788
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   789
    Label PRELOOP_START, EXIT_PRELOOP, REMAINDER, REMAINDER_16, LOOP, END, EXIT, END_LOOP,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   790
    AES192, AES256, AES192_REMAINDER16, REMAINDER16_END_LOOP, AES256_REMAINDER16,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   791
    REMAINDER_8, REMAINDER_4, AES192_REMAINDER8, REMAINDER_LOOP, AES256_REMINDER,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   792
    AES192_REMAINDER, END_REMAINDER_LOOP, AES256_REMAINDER8, REMAINDER8_END_LOOP,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   793
    AES192_REMAINDER4, AES256_REMAINDER4, AES256_REMAINDER, END_REMAINDER4, EXTRACT_TAILBYTES,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   794
    EXTRACT_TAIL_4BYTES, EXTRACT_TAIL_2BYTES, EXTRACT_TAIL_1BYTE, STORE_CTR;
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   795
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   796
    cmpl(len_reg, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   797
    jcc(Assembler::belowEqual, EXIT);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   798
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   799
    movl(pos, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   800
    // if the number of used encrypted counter bytes < 16,
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   801
    // XOR PT with saved encrypted counter to obtain CT
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   802
    bind(PRELOOP_START);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   803
    cmpl(used, 16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   804
    jcc(Assembler::aboveEqual, EXIT_PRELOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   805
    movb(rbx, Address(saved_encCounter_start, used));
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   806
    xorb(rbx, Address(src_addr, pos));
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   807
    movb(Address(dest_addr, pos), rbx);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   808
    addptr(pos, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   809
    addptr(used, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   810
    decrement(len_reg);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   811
    jmp(PRELOOP_START);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   812
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   813
    bind(EXIT_PRELOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   814
    movl(Address(used_addr, 0), used);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   815
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   816
    // Calculate number of rounds i.e. 10, 12, 14,  based on key length(128, 192, 256).
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   817
    movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   818
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   819
    vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   820
    // Move initial counter value in xmm0
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   821
    movdqu(xmm0, Address(counter, 0));
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   822
    // broadcast counter value to zmm8
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   823
    evshufi64x2(xmm8, xmm0, xmm0, 0, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   824
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   825
    // load lbswap mask
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   826
    evmovdquq(xmm16, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, r15);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   827
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   828
    //shuffle counter using lbswap_mask
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   829
    vpshufb(xmm8, xmm8, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   830
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   831
    // pre-increment and propagate counter values to zmm9-zmm15 registers.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   832
    // Linc0 increments the zmm8 by 1 (initial value being 0), Linc4 increments the counters zmm9-zmm15 by 4
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   833
    // The counter is incremented after each block i.e. 16 bytes is processed;
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   834
    // each zmm register has 4 counter values as its MSB
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   835
    // the counters are incremented in parallel
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   836
    vpaddd(xmm8, xmm8, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, r15);//linc0
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   837
    vpaddd(xmm9, xmm8, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   838
    vpaddd(xmm10, xmm9, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   839
    vpaddd(xmm11, xmm10, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   840
    vpaddd(xmm12, xmm11, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   841
    vpaddd(xmm13, xmm12, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   842
    vpaddd(xmm14, xmm13, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   843
    vpaddd(xmm15, xmm14, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   844
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   845
    // load linc32 mask in zmm register.linc32 increments counter by 32
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   846
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 256), Assembler::AVX_512bit, r15);//Linc32
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   847
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   848
    // xmm31 contains the key shuffle mask.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   849
    movdqu(xmm31, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   850
    // Load key function loads 128 bit key and shuffles it. Then we broadcast the shuffled key to convert it into a 512 bit value.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   851
    // For broadcasting the values to ZMM, vshufi64 is used instead of evbroadcasti64x2 as the source in this case is ZMM register
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   852
    // that holds shuffled key value.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   853
    ev_load_key(xmm20, key, 0, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   854
    ev_load_key(xmm21, key, 1 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   855
    ev_load_key(xmm22, key, 2 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   856
    ev_load_key(xmm23, key, 3 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   857
    ev_load_key(xmm24, key, 4 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   858
    ev_load_key(xmm25, key, 5 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   859
    ev_load_key(xmm26, key, 6 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   860
    ev_load_key(xmm27, key, 7 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   861
    ev_load_key(xmm28, key, 8 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   862
    ev_load_key(xmm29, key, 9 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   863
    ev_load_key(xmm30, key, 10 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   864
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   865
    // Process 32 blocks or 512 bytes of data
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   866
    bind(LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   867
    cmpl(len_reg, 512);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   868
    jcc(Assembler::less, REMAINDER);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   869
    subq(len_reg, 512);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   870
    //Shuffle counter and Exor it with roundkey1. Result is stored in zmm0-7
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   871
    vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   872
    evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   873
    vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   874
    evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   875
    vpshufb(xmm2, xmm10, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   876
    evpxorq(xmm2, xmm2, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   877
    vpshufb(xmm3, xmm11, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   878
    evpxorq(xmm3, xmm3, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   879
    vpshufb(xmm4, xmm12, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   880
    evpxorq(xmm4, xmm4, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   881
    vpshufb(xmm5, xmm13, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   882
    evpxorq(xmm5, xmm5, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   883
    vpshufb(xmm6, xmm14, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   884
    evpxorq(xmm6, xmm6, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   885
    vpshufb(xmm7, xmm15, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   886
    evpxorq(xmm7, xmm7, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   887
    // Perform AES encode operations and put results in zmm0-zmm7.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   888
    // This is followed by incrementing counter values in zmm8-zmm15.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   889
    // Since we will be processing 32 blocks at a time, the counter is incremented by 32.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   890
    roundEnc(xmm21, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   891
    vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   892
    roundEnc(xmm22, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   893
    vpaddq(xmm9, xmm9, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   894
    roundEnc(xmm23, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   895
    vpaddq(xmm10, xmm10, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   896
    roundEnc(xmm24, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   897
    vpaddq(xmm11, xmm11, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   898
    roundEnc(xmm25, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   899
    vpaddq(xmm12, xmm12, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   900
    roundEnc(xmm26, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   901
    vpaddq(xmm13, xmm13, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   902
    roundEnc(xmm27, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   903
    vpaddq(xmm14, xmm14, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   904
    roundEnc(xmm28, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   905
    vpaddq(xmm15, xmm15, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   906
    roundEnc(xmm29, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   907
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   908
    cmpl(rounds, 52);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   909
    jcc(Assembler::aboveEqual, AES192);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   910
    lastroundEnc(xmm30, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   911
    jmp(END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   912
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   913
    bind(AES192);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   914
    roundEnc(xmm30, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   915
    ev_load_key(xmm18, key, 11 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   916
    roundEnc(xmm18, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   917
    cmpl(rounds, 60);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   918
    jcc(Assembler::aboveEqual, AES256);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   919
    ev_load_key(xmm18, key, 12 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   920
    lastroundEnc(xmm18, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   921
    jmp(END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   922
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   923
    bind(AES256);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   924
    ev_load_key(xmm18, key, 12 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   925
    roundEnc(xmm18, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   926
    ev_load_key(xmm18, key, 13 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   927
    roundEnc(xmm18, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   928
    ev_load_key(xmm18, key, 14 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   929
    lastroundEnc(xmm18, 7);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   930
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   931
    // After AES encode rounds, the encrypted block cipher lies in zmm0-zmm7
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   932
    // xor encrypted block cipher and input plaintext and store resultant ciphertext
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   933
    bind(END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   934
    evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   935
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   936
    evpxorq(xmm1, xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   937
    evmovdquq(Address(dest_addr, pos, Address::times_1, 64), xmm1, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   938
    evpxorq(xmm2, xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   939
    evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   940
    evpxorq(xmm3, xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   941
    evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   942
    evpxorq(xmm4, xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   943
    evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   944
    evpxorq(xmm5, xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   945
    evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   946
    evpxorq(xmm6, xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   947
    evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   948
    evpxorq(xmm7, xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   949
    evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   950
    addq(pos, 512);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   951
    jmp(LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   952
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   953
    // Encode 256, 128, 64 or 16 bytes at a time if length is less than 512 bytes
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   954
    bind(REMAINDER);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   955
    cmpl(len_reg, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   956
    jcc(Assembler::equal, END);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   957
    cmpl(len_reg, 256);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   958
    jcc(Assembler::aboveEqual, REMAINDER_16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   959
    cmpl(len_reg, 128);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   960
    jcc(Assembler::aboveEqual, REMAINDER_8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   961
    cmpl(len_reg, 64);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   962
    jcc(Assembler::aboveEqual, REMAINDER_4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   963
    // At this point, we will process 16 bytes of data at a time.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   964
    // So load xmm19 with counter increment value as 1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   965
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   966
    jmp(REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   967
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   968
    // Each ZMM register can be used to encode 64 bytes of data, so we have 4 ZMM registers to encode 256 bytes of data
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   969
    bind(REMAINDER_16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   970
    subq(len_reg, 256);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   971
    // As we process 16 blocks at a time, load mask for incrementing the counter value by 16
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   972
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 320), Assembler::AVX_512bit, r15);//Linc16(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   973
    // shuffle counter and XOR counter with roundkey1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   974
    vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   975
    evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   976
    vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   977
    evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   978
    vpshufb(xmm2, xmm10, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   979
    evpxorq(xmm2, xmm2, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   980
    vpshufb(xmm3, xmm11, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   981
    evpxorq(xmm3, xmm3, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   982
    // Increment counter values by 16
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   983
    vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   984
    vpaddq(xmm9, xmm9, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   985
    // AES encode rounds
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   986
    roundEnc(xmm21, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   987
    roundEnc(xmm22, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   988
    roundEnc(xmm23, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   989
    roundEnc(xmm24, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   990
    roundEnc(xmm25, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   991
    roundEnc(xmm26, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   992
    roundEnc(xmm27, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   993
    roundEnc(xmm28, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   994
    roundEnc(xmm29, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   995
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   996
    cmpl(rounds, 52);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   997
    jcc(Assembler::aboveEqual, AES192_REMAINDER16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   998
    lastroundEnc(xmm30, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
   999
    jmp(REMAINDER16_END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1000
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1001
    bind(AES192_REMAINDER16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1002
    roundEnc(xmm30, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1003
    ev_load_key(xmm18, key, 11 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1004
    roundEnc(xmm18, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1005
    ev_load_key(xmm5, key, 12 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1006
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1007
    cmpl(rounds, 60);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1008
    jcc(Assembler::aboveEqual, AES256_REMAINDER16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1009
    lastroundEnc(xmm5, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1010
    jmp(REMAINDER16_END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1011
    bind(AES256_REMAINDER16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1012
    roundEnc(xmm5, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1013
    ev_load_key(xmm6, key, 13 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1014
    roundEnc(xmm6, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1015
    ev_load_key(xmm7, key, 14 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1016
    lastroundEnc(xmm7, 3);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1017
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1018
    // After AES encode rounds, the encrypted block cipher lies in zmm0-zmm3
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1019
    // xor 256 bytes of PT with the encrypted counters to produce CT.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1020
    bind(REMAINDER16_END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1021
    evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1022
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1023
    evpxorq(xmm1, xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1024
    evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1025
    evpxorq(xmm2, xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1026
    evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1027
    evpxorq(xmm3, xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1028
    evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1029
    addq(pos, 256);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1030
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1031
    cmpl(len_reg, 128);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1032
    jcc(Assembler::aboveEqual, REMAINDER_8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1033
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1034
    cmpl(len_reg, 64);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1035
    jcc(Assembler::aboveEqual, REMAINDER_4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1036
    //load mask for incrementing the counter value by 1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1037
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1038
    jmp(REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1039
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1040
    // Each ZMM register can be used to encode 64 bytes of data, so we have 2 ZMM registers to encode 128 bytes of data
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1041
    bind(REMAINDER_8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1042
    subq(len_reg, 128);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1043
    // As we process 8 blocks at a time, load mask for incrementing the counter value by 8
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1044
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 192), Assembler::AVX_512bit, r15);//Linc8(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1045
    // shuffle counters and xor with roundkey1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1046
    vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1047
    evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1048
    vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1049
    evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1050
    // increment counter by 8
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1051
    vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1052
    // AES encode
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1053
    roundEnc(xmm21, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1054
    roundEnc(xmm22, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1055
    roundEnc(xmm23, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1056
    roundEnc(xmm24, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1057
    roundEnc(xmm25, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1058
    roundEnc(xmm26, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1059
    roundEnc(xmm27, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1060
    roundEnc(xmm28, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1061
    roundEnc(xmm29, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1062
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1063
    cmpl(rounds, 52);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1064
    jcc(Assembler::aboveEqual, AES192_REMAINDER8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1065
    lastroundEnc(xmm30, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1066
    jmp(REMAINDER8_END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1067
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1068
    bind(AES192_REMAINDER8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1069
    roundEnc(xmm30, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1070
    ev_load_key(xmm18, key, 11 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1071
    roundEnc(xmm18, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1072
    ev_load_key(xmm5, key, 12 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1073
    cmpl(rounds, 60);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1074
    jcc(Assembler::aboveEqual, AES256_REMAINDER8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1075
    lastroundEnc(xmm5, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1076
    jmp(REMAINDER8_END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1077
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1078
    bind(AES256_REMAINDER8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1079
    roundEnc(xmm5, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1080
    ev_load_key(xmm6, key, 13 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1081
    roundEnc(xmm6, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1082
    ev_load_key(xmm7, key, 14 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1083
    lastroundEnc(xmm7, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1084
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1085
    bind(REMAINDER8_END_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1086
    // After AES encode rounds, the encrypted block cipher lies in zmm0-zmm1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1087
    // XOR PT with the encrypted counter and store as CT
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1088
    evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1089
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1090
    evpxorq(xmm1, xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1091
    evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1092
    addq(pos, 128);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1093
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1094
    cmpl(len_reg, 64);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1095
    jcc(Assembler::aboveEqual, REMAINDER_4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1096
    // load mask for incrementing the counter value by 1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1097
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1098
    jmp(REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1099
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1100
    // Each ZMM register can be used to encode 64 bytes of data, so we have 1 ZMM register used in this block of code
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1101
    bind(REMAINDER_4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1102
    subq(len_reg, 64);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1103
    // As we process 4 blocks at a time, load mask for incrementing the counter value by 4
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1104
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1105
    // XOR counter with first roundkey
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1106
    vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1107
    evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1108
    // Increment counter
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1109
    vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1110
    vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1111
    vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1112
    vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1113
    vaesenc(xmm0, xmm0, xmm24, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1114
    vaesenc(xmm0, xmm0, xmm25, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1115
    vaesenc(xmm0, xmm0, xmm26, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1116
    vaesenc(xmm0, xmm0, xmm27, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1117
    vaesenc(xmm0, xmm0, xmm28, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1118
    vaesenc(xmm0, xmm0, xmm29, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1119
    cmpl(rounds, 52);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1120
    jcc(Assembler::aboveEqual, AES192_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1121
    vaesenclast(xmm0, xmm0, xmm30, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1122
    jmp(END_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1123
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1124
    bind(AES192_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1125
    vaesenc(xmm0, xmm0, xmm30, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1126
    ev_load_key(xmm18, key, 11 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1127
    vaesenc(xmm0, xmm0, xmm18, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1128
    ev_load_key(xmm5, key, 12 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1129
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1130
    cmpl(rounds, 60);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1131
    jcc(Assembler::aboveEqual, AES256_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1132
    vaesenclast(xmm0, xmm0, xmm5, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1133
    jmp(END_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1134
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1135
    bind(AES256_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1136
    vaesenc(xmm0, xmm0, xmm5, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1137
    ev_load_key(xmm6, key, 13 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1138
    vaesenc(xmm0, xmm0, xmm6, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1139
    ev_load_key(xmm7, key, 14 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1140
    vaesenclast(xmm0, xmm0, xmm7, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1141
    // After AES encode rounds, the encrypted block cipher lies in zmm0.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1142
    // XOR encrypted block cipher with PT and store 64 bytes of ciphertext
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1143
    bind(END_REMAINDER4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1144
    evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1145
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1146
    addq(pos, 64);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1147
    // load mask for incrementing the counter value by 1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1148
    evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip)
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1149
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1150
    // For a single block, the AES rounds start here.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1151
    bind(REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1152
    cmpl(len_reg, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1153
    jcc(Assembler::belowEqual, END);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1154
    // XOR counter with first roundkey
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1155
    vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1156
    evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1157
    vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1158
    // Increment counter by 1
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1159
    vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1160
    vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1161
    vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1162
    vaesenc(xmm0, xmm0, xmm24, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1163
    vaesenc(xmm0, xmm0, xmm25, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1164
    vaesenc(xmm0, xmm0, xmm26, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1165
    vaesenc(xmm0, xmm0, xmm27, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1166
    vaesenc(xmm0, xmm0, xmm28, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1167
    vaesenc(xmm0, xmm0, xmm29, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1168
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1169
    cmpl(rounds, 52);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1170
    jcc(Assembler::aboveEqual, AES192_REMAINDER);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1171
    vaesenclast(xmm0, xmm0, xmm30, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1172
    jmp(END_REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1173
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1174
    bind(AES192_REMAINDER);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1175
    vaesenc(xmm0, xmm0, xmm30, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1176
    ev_load_key(xmm18, key, 11 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1177
    vaesenc(xmm0, xmm0, xmm18, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1178
    ev_load_key(xmm5, key, 12 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1179
    cmpl(rounds, 60);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1180
    jcc(Assembler::aboveEqual, AES256_REMAINDER);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1181
    vaesenclast(xmm0, xmm0, xmm5, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1182
    jmp(END_REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1183
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1184
    bind(AES256_REMAINDER);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1185
    vaesenc(xmm0, xmm0, xmm5, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1186
    ev_load_key(xmm6, key, 13 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1187
    vaesenc(xmm0, xmm0, xmm6, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1188
    ev_load_key(xmm7, key, 14 * 16, xmm31);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1189
    vaesenclast(xmm0, xmm0, xmm7, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1190
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1191
    bind(END_REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1192
    // If the length register is less than the blockSize i.e. 16
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1193
    // then we store only those bytes of the CT to the destination
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1194
    // corresponding to the length register value
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1195
    // extracting the exact number of bytes is handled by EXTRACT_TAILBYTES
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1196
    cmpl(len_reg, 16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1197
    jcc(Assembler::less, EXTRACT_TAILBYTES);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1198
    subl(len_reg, 16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1199
    // After AES encode rounds, the encrypted block cipher lies in xmm0.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1200
    // If the length register is equal to 16 bytes, store CT in dest after XOR operation.
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1201
    evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1202
    evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1203
    addl(pos, 16);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1204
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1205
    jmp(REMAINDER_LOOP);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1206
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1207
    bind(EXTRACT_TAILBYTES);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1208
    // Save encrypted counter value in xmm0 for next invocation, before XOR operation
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1209
    movdqu(Address(saved_encCounter_start, 0), xmm0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1210
    // XOR encryted block cipher in xmm0 with PT to produce CT
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1211
    evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1212
    // extract upto 15 bytes of CT from xmm0 as specified by length register
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1213
    testptr(len_reg, 8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1214
    jcc(Assembler::zero, EXTRACT_TAIL_4BYTES);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1215
    pextrq(Address(dest_addr, pos), xmm0, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1216
    psrldq(xmm0, 8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1217
    addl(pos, 8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1218
    bind(EXTRACT_TAIL_4BYTES);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1219
    testptr(len_reg, 4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1220
    jcc(Assembler::zero, EXTRACT_TAIL_2BYTES);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1221
    pextrd(Address(dest_addr, pos), xmm0, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1222
    psrldq(xmm0, 4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1223
    addq(pos, 4);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1224
    bind(EXTRACT_TAIL_2BYTES);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1225
    testptr(len_reg, 2);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1226
    jcc(Assembler::zero, EXTRACT_TAIL_1BYTE);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1227
    pextrw(Address(dest_addr, pos), xmm0, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1228
    psrldq(xmm0, 2);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1229
    addl(pos, 2);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1230
    bind(EXTRACT_TAIL_1BYTE);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1231
    testptr(len_reg, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1232
    jcc(Assembler::zero, END);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1233
    pextrb(Address(dest_addr, pos), xmm0, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1234
    addl(pos, 1);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1235
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1236
    bind(END);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1237
    // If there are no tail bytes, store counter value and exit
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1238
    cmpl(len_reg, 0);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1239
    jcc(Assembler::equal, STORE_CTR);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1240
    movl(Address(used_addr, 0), len_reg);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1241
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1242
    bind(STORE_CTR);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1243
    //shuffle updated counter and store it
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1244
    vpshufb(xmm8, xmm8, xmm16, Assembler::AVX_128bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1245
    movdqu(Address(counter, 0), xmm8);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1246
    // Zero out counter and key registers
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1247
    evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1248
    evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1249
    evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1250
    evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1251
    evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1252
    evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1253
    evpxorq(xmm25, xmm25, xmm25, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1254
    evpxorq(xmm26, xmm26, xmm26, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1255
    evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1256
    evpxorq(xmm28, xmm28, xmm28, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1257
    evpxorq(xmm29, xmm29, xmm29, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1258
    evpxorq(xmm30, xmm30, xmm30, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1259
    cmpl(rounds, 44);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1260
    jcc(Assembler::belowEqual, EXIT);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1261
    evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1262
    evpxorq(xmm5, xmm5, xmm5, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1263
    cmpl(rounds, 52);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1264
    jcc(Assembler::belowEqual, EXIT);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1265
    evpxorq(xmm6, xmm6, xmm6, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1266
    evpxorq(xmm7, xmm7, xmm7, Assembler::AVX_512bit);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1267
    bind(EXIT);
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1268
}
c6a789f495fe 8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents: 57786
diff changeset
  1269
53017
e10a1f7aaa13 8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents: 52990
diff changeset
  1270
#endif // _LP64