author | kvn |
Thu, 07 Nov 2019 17:47:22 -0800 | |
changeset 58977 | c6a789f495fe |
parent 57786 | 948ac3112da8 |
permissions | -rw-r--r-- |
52990 | 1 |
/* |
58977
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
2 |
* Copyright (c) 2019, Intel Corporation. |
52990 | 3 |
* |
4 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
5 |
* |
|
6 |
* This code is free software; you can redistribute it and/or modify it |
|
7 |
* under the terms of the GNU General Public License version 2 only, as |
|
8 |
* published by the Free Software Foundation. |
|
9 |
* |
|
10 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
14 |
* accompanied this code). |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU General Public License version |
|
17 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
18 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
* |
|
20 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 |
* or visit www.oracle.com if you need additional information or have any |
|
22 |
* questions. |
|
23 |
* |
|
24 |
*/ |
|
25 |
||
26 |
#include "precompiled.hpp" |
|
27 |
#include "asm/assembler.hpp" |
|
28 |
#include "asm/assembler.inline.hpp" |
|
29 |
#include "runtime/stubRoutines.hpp" |
|
30 |
#include "macroAssembler_x86.hpp" |
|
31 |
||
53017
e10a1f7aaa13
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents:
52990
diff
changeset
|
32 |
#ifdef _LP64 |
57786
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
33 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
34 |
void MacroAssembler::roundEnc(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
35 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
36 |
vaesenc(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
37 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
38 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
39 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
40 |
void MacroAssembler::lastroundEnc(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
41 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
42 |
vaesenclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
43 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
44 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
45 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
46 |
void MacroAssembler::roundDec(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
47 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
48 |
vaesdec(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
49 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
50 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
51 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
52 |
void MacroAssembler::lastroundDec(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
53 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
54 |
vaesdeclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
55 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
56 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
57 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
58 |
// Load key and shuffle operation |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
59 |
void MacroAssembler::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
60 |
movdqu(xmmdst, Address(key, offset)); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
61 |
if (xmm_shuf_mask != NULL) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
62 |
pshufb(xmmdst, xmm_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
63 |
} else { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
64 |
pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
65 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
66 |
evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
67 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
68 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
69 |
// AES-ECB Encrypt Operation |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
70 |
void MacroAssembler::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
71 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
72 |
const Register pos = rax; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
73 |
const Register rounds = r12; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
74 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
75 |
Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
76 |
push(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
77 |
push(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
78 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
79 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
80 |
// context for the registers used, where all instructions below are using 128-bit mode |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
81 |
// On EVEX without VL and BW, these instructions will all be AVX. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
82 |
if (VM_Version::supports_avx512vlbw()) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
83 |
movl(rax, 0xffff); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
84 |
kmovql(k1, rax); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
85 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
86 |
push(len); // Save |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
87 |
push(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
88 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
89 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
90 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
91 |
xorptr(pos, pos); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
92 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
93 |
// Calculate number of rounds based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
94 |
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
95 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
96 |
// Load Key shuf mask |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
97 |
const XMMRegister xmm_key_shuf_mask = xmm31; // used temporarily to swap key bytes up front |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
98 |
movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
99 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
100 |
// Load and shuffle key based on number of rounds |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
101 |
ev_load_key(xmm8, key, 0 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
102 |
ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
103 |
ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
104 |
ev_load_key(xmm23, key, 3 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
105 |
ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
106 |
ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
107 |
ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
108 |
ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
109 |
ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
110 |
ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
111 |
ev_load_key(xmm24, key, 10 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
112 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
113 |
jcc(Assembler::greaterEqual, KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
114 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
115 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
116 |
bind(KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
117 |
ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
118 |
ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
119 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
120 |
jcc(Assembler::equal, KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
121 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
122 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
123 |
bind(KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
124 |
ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
125 |
ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
126 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
127 |
bind(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
128 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
129 |
// Divide length by 16 to convert it to number of blocks |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
130 |
shrq(len, 4); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
131 |
shlq(rbx, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
132 |
jcc(Assembler::equal, NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
133 |
addq(len, 1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
134 |
// Check if number of blocks is greater than or equal to 32 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
135 |
// If true, 512 bytes are processed at a time (code marked by label LOOP) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
136 |
// If not, 16 bytes are processed (code marked by REMAINDER label) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
137 |
bind(NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
138 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
139 |
shrq(len, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
140 |
jcc(Assembler::equal, REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
141 |
movl(r13, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
142 |
// Compute number of blocks that will be processed 512 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
143 |
// Subtract this from the total number of blocks which will then be processed by REMAINDER loop |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
144 |
shlq(r13, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
145 |
subq(rbx, r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
146 |
//Begin processing 512 bytes |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
147 |
bind(LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
148 |
// Move 64 bytes of PT data into a zmm register, as a result 512 bytes of PT loaded in zmm0-7 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
149 |
evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
150 |
evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
151 |
evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
152 |
evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
153 |
evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
154 |
evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
155 |
evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
156 |
evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
157 |
// Xor with the first round key |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
158 |
evpxorq(xmm0, xmm0, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
159 |
evpxorq(xmm1, xmm1, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
160 |
evpxorq(xmm2, xmm2, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
161 |
evpxorq(xmm3, xmm3, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
162 |
evpxorq(xmm4, xmm4, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
163 |
evpxorq(xmm5, xmm5, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
164 |
evpxorq(xmm6, xmm6, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
165 |
evpxorq(xmm7, xmm7, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
166 |
// 9 Aes encode round operations |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
167 |
roundEnc(xmm9, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
168 |
roundEnc(xmm10, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
169 |
roundEnc(xmm23, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
170 |
roundEnc(xmm12, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
171 |
roundEnc(xmm13, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
172 |
roundEnc(xmm14, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
173 |
roundEnc(xmm15, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
174 |
roundEnc(xmm16, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
175 |
roundEnc(xmm17, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
176 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
177 |
jcc(Assembler::aboveEqual, AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
178 |
// Aesenclast round operation for keysize = 128 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
179 |
lastroundEnc(xmm24, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
180 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
181 |
//Additional 2 rounds of Aesenc operation for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
182 |
bind(AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
183 |
roundEnc(xmm24, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
184 |
roundEnc(xmm19, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
185 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
186 |
jcc(Assembler::aboveEqual, AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
187 |
// Aesenclast round for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
188 |
lastroundEnc(xmm20, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
189 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
190 |
// 2 rounds of Aesenc operation and Aesenclast for keysize = 256 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
191 |
bind(AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
192 |
roundEnc(xmm20, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
193 |
roundEnc(xmm21, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
194 |
lastroundEnc(xmm22, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
195 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
196 |
bind(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
197 |
// Move 512 bytes of CT to destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
198 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
199 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
200 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
201 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
202 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
203 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
204 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
205 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
206 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
207 |
addq(pos, 512); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
208 |
decq(len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
209 |
jcc(Assembler::notEqual, LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
210 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
211 |
bind(REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
212 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
213 |
cmpq(rbx, 0); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
214 |
jcc(Assembler::equal, END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
215 |
// Process 16 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
216 |
bind(LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
217 |
movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0)); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
218 |
vpxor(xmm1, xmm1, xmm8, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
219 |
// xmm2 contains shuffled key for Aesenclast operation. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
220 |
vmovdqu(xmm2, xmm24); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
221 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
222 |
vaesenc(xmm1, xmm1, xmm9, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
223 |
vaesenc(xmm1, xmm1, xmm10, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
224 |
vaesenc(xmm1, xmm1, xmm23, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
225 |
vaesenc(xmm1, xmm1, xmm12, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
226 |
vaesenc(xmm1, xmm1, xmm13, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
227 |
vaesenc(xmm1, xmm1, xmm14, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
228 |
vaesenc(xmm1, xmm1, xmm15, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
229 |
vaesenc(xmm1, xmm1, xmm16, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
230 |
vaesenc(xmm1, xmm1, xmm17, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
231 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
232 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
233 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
234 |
vmovdqu(xmm2, xmm20); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
235 |
vaesenc(xmm1, xmm1, xmm24, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
236 |
vaesenc(xmm1, xmm1, xmm19, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
237 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
238 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
239 |
vmovdqu(xmm2, xmm22); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
240 |
vaesenc(xmm1, xmm1, xmm20, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
241 |
vaesenc(xmm1, xmm1, xmm21, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
242 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
243 |
bind(LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
244 |
// Aesenclast round |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
245 |
vaesenclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
246 |
// Write 16 bytes of CT to destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
247 |
movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
248 |
addq(pos, 16); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
249 |
decq(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
250 |
jcc(Assembler::notEqual, LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
251 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
252 |
bind(END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
253 |
// Zero out the round keys |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
254 |
evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
255 |
evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
256 |
evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
257 |
evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
258 |
evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
259 |
evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
260 |
evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
261 |
evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
262 |
evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
263 |
evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
264 |
evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
265 |
cmpl(rounds, 44); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
266 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
267 |
evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
268 |
evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
269 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
270 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
271 |
evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
272 |
evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
273 |
bind(EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
274 |
pop(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
275 |
pop(rax); // return length |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
276 |
pop(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
277 |
pop(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
278 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
279 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
280 |
// AES-ECB Decrypt Operation |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
281 |
void MacroAssembler::aesecb_decrypt(Register src_addr, Register dest_addr, Register key, Register len) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
282 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
283 |
Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
284 |
const Register pos = rax; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
285 |
const Register rounds = r12; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
286 |
push(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
287 |
push(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
288 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
289 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
290 |
// context for the registers used, where all instructions below are using 128-bit mode |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
291 |
// On EVEX without VL and BW, these instructions will all be AVX. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
292 |
if (VM_Version::supports_avx512vlbw()) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
293 |
movl(rax, 0xffff); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
294 |
kmovql(k1, rax); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
295 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
296 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
297 |
push(len); // Save |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
298 |
push(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
299 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
300 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
301 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
302 |
xorptr(pos, pos); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
303 |
// Calculate number of rounds i.e. based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
304 |
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
305 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
306 |
// Load Key shuf mask |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
307 |
const XMMRegister xmm_key_shuf_mask = xmm31; // used temporarily to swap key bytes up front |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
308 |
movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
309 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
310 |
// Load and shuffle round keys. The java expanded key ordering is rotated one position in decryption. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
311 |
// So the first round key is loaded from 1*16 here and last round key is loaded from 0*16 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
312 |
ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
313 |
ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
314 |
ev_load_key(xmm11, key, 3 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
315 |
ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
316 |
ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
317 |
ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
318 |
ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
319 |
ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
320 |
ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
321 |
ev_load_key(xmm18, key, 10 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
322 |
ev_load_key(xmm27, key, 0 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
323 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
324 |
jcc(Assembler::greaterEqual, KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
325 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
326 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
327 |
bind(KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
328 |
ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
329 |
ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
330 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
331 |
jcc(Assembler::equal, KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
332 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
333 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
334 |
bind(KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
335 |
ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
336 |
ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
337 |
bind(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
338 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
339 |
// Convert input length to number of blocks |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
340 |
shrq(len, 4); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
341 |
shlq(rbx, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
342 |
jcc(Assembler::equal, NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
343 |
addq(len, 1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
344 |
// Check if number of blocks is greater than/ equal to 32 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
345 |
// If true, blocks then 512 bytes are processed at a time (code marked by label LOOP) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
346 |
// If not, 16 bytes are processed (code marked by label REMAINDER) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
347 |
bind(NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
348 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
349 |
shrq(len, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
350 |
jcc(Assembler::equal, REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
351 |
movl(r13, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
352 |
// Compute number of blocks that will be processed as 512 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
353 |
// Subtract this from the total number of blocks, which will then be processed by REMAINDER loop. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
354 |
shlq(r13, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
355 |
subq(rbx, r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
356 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
357 |
bind(LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
358 |
// Move 64 bytes of CT data into a zmm register, as a result 512 bytes of CT loaded in zmm0-7 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
359 |
evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
360 |
evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
361 |
evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
362 |
evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
363 |
evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
364 |
evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
365 |
evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
366 |
evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
367 |
// Xor with the first round key |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
368 |
evpxorq(xmm0, xmm0, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
369 |
evpxorq(xmm1, xmm1, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
370 |
evpxorq(xmm2, xmm2, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
371 |
evpxorq(xmm3, xmm3, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
372 |
evpxorq(xmm4, xmm4, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
373 |
evpxorq(xmm5, xmm5, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
374 |
evpxorq(xmm6, xmm6, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
375 |
evpxorq(xmm7, xmm7, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
376 |
// 9 rounds of Aesdec |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
377 |
roundDec(xmm10, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
378 |
roundDec(xmm11, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
379 |
roundDec(xmm12, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
380 |
roundDec(xmm13, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
381 |
roundDec(xmm14, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
382 |
roundDec(xmm15, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
383 |
roundDec(xmm16, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
384 |
roundDec(xmm17, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
385 |
roundDec(xmm18, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
386 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
387 |
jcc(Assembler::aboveEqual, AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
388 |
// Aesdeclast round for keysize = 128 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
389 |
lastroundDec(xmm27, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
390 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
391 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
392 |
bind(AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
393 |
// 2 Additional rounds for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
394 |
roundDec(xmm19, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
395 |
roundDec(xmm20, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
396 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
397 |
jcc(Assembler::aboveEqual, AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
398 |
// Aesdeclast round for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
399 |
lastroundDec(xmm27, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
400 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
401 |
bind(AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
402 |
// 2 Additional rounds and Aesdeclast for keysize = 256 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
403 |
roundDec(xmm21, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
404 |
roundDec(xmm22, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
405 |
lastroundDec(xmm27, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
406 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
407 |
bind(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
408 |
// Write 512 bytes of PT to the destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
409 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
410 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
411 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
412 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
413 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
414 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
415 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
416 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
417 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
418 |
addq(pos, 512); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
419 |
decq(len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
420 |
jcc(Assembler::notEqual, LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
421 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
422 |
bind(REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
423 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
424 |
cmpq(rbx, 0); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
425 |
jcc(Assembler::equal, END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
426 |
// Process 16 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
427 |
bind(LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
428 |
movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0)); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
429 |
vpxor(xmm1, xmm1, xmm9, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
430 |
// xmm2 contains shuffled key for Aesdeclast operation. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
431 |
vmovdqu(xmm2, xmm27); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
432 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
433 |
vaesdec(xmm1, xmm1, xmm10, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
434 |
vaesdec(xmm1, xmm1, xmm11, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
435 |
vaesdec(xmm1, xmm1, xmm12, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
436 |
vaesdec(xmm1, xmm1, xmm13, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
437 |
vaesdec(xmm1, xmm1, xmm14, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
438 |
vaesdec(xmm1, xmm1, xmm15, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
439 |
vaesdec(xmm1, xmm1, xmm16, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
440 |
vaesdec(xmm1, xmm1, xmm17, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
441 |
vaesdec(xmm1, xmm1, xmm18, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
442 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
443 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
444 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
445 |
vaesdec(xmm1, xmm1, xmm19, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
446 |
vaesdec(xmm1, xmm1, xmm20, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
447 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
448 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
449 |
vaesdec(xmm1, xmm1, xmm21, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
450 |
vaesdec(xmm1, xmm1, xmm22, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
451 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
452 |
bind(LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
453 |
// Aesdeclast round |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
454 |
vaesdeclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
455 |
// Write 16 bytes of PT to destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
456 |
movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
457 |
addq(pos, 16); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
458 |
decq(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
459 |
jcc(Assembler::notEqual, LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
460 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
461 |
bind(END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
462 |
// Zero out the round keys |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
463 |
evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
464 |
evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
465 |
evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
466 |
evpxorq(xmm11, xmm11, xmm11, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
467 |
evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
468 |
evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
469 |
evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
470 |
evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
471 |
evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
472 |
evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
473 |
evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
474 |
evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
475 |
cmpl(rounds, 44); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
476 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
477 |
evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
478 |
evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
479 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
480 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
481 |
evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
482 |
evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
483 |
bind(EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
484 |
pop(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
485 |
pop(rax); // return length |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
486 |
pop(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
487 |
pop(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
488 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
489 |
|
52990 | 490 |
// Multiply 128 x 128 bits, using 4 pclmulqdq operations |
491 |
void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data, |
|
492 |
XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) { |
|
493 |
movdqu(xmm15, Address(htbl, i * 16)); |
|
494 |
vpclmulhqlqdq(tmp3, data, xmm15); // 0x01 |
|
495 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); |
|
496 |
vpclmulldq(tmp3, data, xmm15); // 0x00 |
|
497 |
vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit); |
|
498 |
vpclmulhdq(tmp3, data, xmm15); // 0x11 |
|
499 |
vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit); |
|
500 |
vpclmullqhqdq(tmp3, data, xmm15); // 0x10 |
|
501 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); |
|
502 |
} |
|
503 |
||
504 |
// Multiply two 128 bit numbers resulting in a 256 bit value |
|
505 |
// Result of the multiplication followed by reduction stored in state |
|
506 |
void MacroAssembler::gfmul(XMMRegister tmp0, XMMRegister state) { |
|
507 |
const XMMRegister tmp1 = xmm4; |
|
508 |
const XMMRegister tmp2 = xmm5; |
|
509 |
const XMMRegister tmp3 = xmm6; |
|
510 |
const XMMRegister tmp4 = xmm7; |
|
511 |
||
512 |
vpclmulldq(tmp1, state, tmp0); //0x00 (a0 * b0) |
|
513 |
vpclmulhdq(tmp4, state, tmp0);//0x11 (a1 * b1) |
|
514 |
vpclmullqhqdq(tmp2, state, tmp0);//0x10 (a1 * b0) |
|
515 |
vpclmulhqlqdq(tmp3, state, tmp0); //0x01 (a0 * b1) |
|
516 |
||
517 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); // (a0 * b1) + (a1 * b0) |
|
518 |
||
519 |
vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit); |
|
520 |
vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit); |
|
521 |
vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit); // tmp1 and tmp4 hold the result |
|
522 |
vpxor(tmp4, tmp4, tmp2, Assembler::AVX_128bit); // of carryless multiplication |
|
523 |
// Follows the reduction technique mentioned in |
|
524 |
// Shift-XOR reduction described in Gueron-Kounavis May 2010 |
|
525 |
// First phase of reduction |
|
526 |
// |
|
527 |
vpslld(xmm8, tmp1, 31, Assembler::AVX_128bit); // packed right shift shifting << 31 |
|
528 |
vpslld(xmm9, tmp1, 30, Assembler::AVX_128bit); // packed right shift shifting << 30 |
|
529 |
vpslld(xmm10, tmp1, 25, Assembler::AVX_128bit);// packed right shift shifting << 25 |
|
530 |
// xor the shifted versions |
|
531 |
vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit); |
|
532 |
vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit); |
|
533 |
vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit); |
|
534 |
vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit); |
|
535 |
vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit);// first phase of the reduction complete |
|
536 |
// |
|
537 |
// Second phase of the reduction |
|
538 |
// |
|
539 |
vpsrld(xmm9, tmp1, 1, Assembler::AVX_128bit);// packed left shifting >> 1 |
|
540 |
vpsrld(xmm10, tmp1, 2, Assembler::AVX_128bit);// packed left shifting >> 2 |
|
541 |
vpsrld(xmm11, tmp1, 7, Assembler::AVX_128bit);// packed left shifting >> 7 |
|
542 |
vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit);// xor the shifted versions |
|
543 |
vpxor(xmm9, xmm9, xmm11, Assembler::AVX_128bit); |
|
544 |
vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit); |
|
545 |
vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit); |
|
546 |
vpxor(state, tmp4, tmp1, Assembler::AVX_128bit);// the result is in state |
|
547 |
ret(0); |
|
548 |
} |
|
549 |
||
550 |
// This method takes the subkey after expansion as input and generates 1 * 16 power of subkey H. |
|
551 |
// The power of H is used in reduction process for one block ghash |
|
552 |
void MacroAssembler::generateHtbl_one_block(Register htbl) { |
|
553 |
const XMMRegister t = xmm13; |
|
554 |
||
555 |
// load the original subkey hash |
|
556 |
movdqu(t, Address(htbl, 0)); |
|
557 |
// shuffle using long swap mask |
|
558 |
movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
|
559 |
vpshufb(t, t, xmm10, Assembler::AVX_128bit); |
|
560 |
||
561 |
// Compute H' = GFMUL(H, 2) |
|
562 |
vpsrld(xmm3, t, 7, Assembler::AVX_128bit); |
|
563 |
movdqu(xmm4, ExternalAddress(StubRoutines::x86::ghash_shufflemask_addr())); |
|
564 |
vpshufb(xmm3, xmm3, xmm4, Assembler::AVX_128bit); |
|
565 |
movl(rax, 0xff00); |
|
566 |
movdl(xmm4, rax); |
|
567 |
vpshufb(xmm4, xmm4, xmm3, Assembler::AVX_128bit); |
|
568 |
movdqu(xmm5, ExternalAddress(StubRoutines::x86::ghash_polynomial_addr())); |
|
569 |
vpand(xmm5, xmm5, xmm4, Assembler::AVX_128bit); |
|
570 |
vpsrld(xmm3, t, 31, Assembler::AVX_128bit); |
|
571 |
vpslld(xmm4, t, 1, Assembler::AVX_128bit); |
|
572 |
vpslldq(xmm3, xmm3, 4, Assembler::AVX_128bit); |
|
573 |
vpxor(t, xmm4, xmm3, Assembler::AVX_128bit);// t holds p(x) <<1 or H * 2 |
|
574 |
||
575 |
//Adding p(x)<<1 to xmm5 which holds the reduction polynomial |
|
576 |
vpxor(t, t, xmm5, Assembler::AVX_128bit); |
|
577 |
movdqu(Address(htbl, 1 * 16), t); // H * 2 |
|
578 |
||
579 |
ret(0); |
|
580 |
} |
|
581 |
||
582 |
// This method takes the subkey after expansion as input and generates the remaining powers of subkey H. |
|
583 |
// The power of H is used in reduction process for eight block ghash |
|
584 |
void MacroAssembler::generateHtbl_eight_blocks(Register htbl) { |
|
585 |
const XMMRegister t = xmm13; |
|
586 |
const XMMRegister tmp0 = xmm1; |
|
587 |
Label GFMUL; |
|
588 |
||
589 |
movdqu(t, Address(htbl, 1 * 16)); |
|
590 |
movdqu(tmp0, t); |
|
591 |
||
592 |
// tmp0 and t hold H. Now we compute powers of H by using GFMUL(H, H) |
|
593 |
call(GFMUL, relocInfo::none); |
|
594 |
movdqu(Address(htbl, 2 * 16), t); //H ^ 2 * 2 |
|
595 |
call(GFMUL, relocInfo::none); |
|
596 |
movdqu(Address(htbl, 3 * 16), t); //H ^ 3 * 2 |
|
597 |
call(GFMUL, relocInfo::none); |
|
598 |
movdqu(Address(htbl, 4 * 16), t); //H ^ 4 * 2 |
|
599 |
call(GFMUL, relocInfo::none); |
|
600 |
movdqu(Address(htbl, 5 * 16), t); //H ^ 5 * 2 |
|
601 |
call(GFMUL, relocInfo::none); |
|
602 |
movdqu(Address(htbl, 6 * 16), t); //H ^ 6 * 2 |
|
603 |
call(GFMUL, relocInfo::none); |
|
604 |
movdqu(Address(htbl, 7 * 16), t); //H ^ 7 * 2 |
|
605 |
call(GFMUL, relocInfo::none); |
|
606 |
movdqu(Address(htbl, 8 * 16), t); //H ^ 8 * 2 |
|
607 |
ret(0); |
|
608 |
||
609 |
bind(GFMUL); |
|
610 |
gfmul(tmp0, t); |
|
611 |
} |
|
612 |
||
613 |
// Multiblock and single block GHASH computation using Shift XOR reduction technique |
|
614 |
void MacroAssembler::avx_ghash(Register input_state, Register htbl, |
|
615 |
Register input_data, Register blocks) { |
|
616 |
||
617 |
// temporary variables to hold input data and input state |
|
618 |
const XMMRegister data = xmm1; |
|
619 |
const XMMRegister state = xmm0; |
|
620 |
// temporary variables to hold intermediate results |
|
621 |
const XMMRegister tmp0 = xmm3; |
|
622 |
const XMMRegister tmp1 = xmm4; |
|
623 |
const XMMRegister tmp2 = xmm5; |
|
624 |
const XMMRegister tmp3 = xmm6; |
|
625 |
// temporary variables to hold byte and long swap masks |
|
626 |
const XMMRegister bswap_mask = xmm2; |
|
627 |
const XMMRegister lswap_mask = xmm14; |
|
628 |
||
629 |
Label GENERATE_HTBL_1_BLK, GENERATE_HTBL_8_BLKS, BEGIN_PROCESS, GFMUL, BLOCK8_REDUCTION, |
|
630 |
ONE_BLK_INIT, PROCESS_1_BLOCK, PROCESS_8_BLOCKS, SAVE_STATE, EXIT_GHASH; |
|
631 |
||
632 |
testptr(blocks, blocks); |
|
633 |
jcc(Assembler::zero, EXIT_GHASH); |
|
634 |
||
635 |
// Check if Hashtable (1*16) has been already generated |
|
636 |
// For anything less than 8 blocks, we generate only the first power of H. |
|
637 |
movdqu(tmp2, Address(htbl, 1 * 16)); |
|
638 |
ptest(tmp2, tmp2); |
|
639 |
jcc(Assembler::notZero, BEGIN_PROCESS); |
|
640 |
call(GENERATE_HTBL_1_BLK, relocInfo::none); |
|
641 |
||
642 |
// Shuffle the input state |
|
643 |
bind(BEGIN_PROCESS); |
|
644 |
movdqu(lswap_mask, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
|
645 |
movdqu(state, Address(input_state, 0)); |
|
646 |
vpshufb(state, state, lswap_mask, Assembler::AVX_128bit); |
|
647 |
||
648 |
cmpl(blocks, 8); |
|
649 |
jcc(Assembler::below, ONE_BLK_INIT); |
|
650 |
// If we have 8 blocks or more data, then generate remaining powers of H |
|
651 |
movdqu(tmp2, Address(htbl, 8 * 16)); |
|
652 |
ptest(tmp2, tmp2); |
|
653 |
jcc(Assembler::notZero, PROCESS_8_BLOCKS); |
|
654 |
call(GENERATE_HTBL_8_BLKS, relocInfo::none); |
|
655 |
||
656 |
//Do 8 multiplies followed by a reduction processing 8 blocks of data at a time |
|
657 |
//Each block = 16 bytes. |
|
658 |
bind(PROCESS_8_BLOCKS); |
|
659 |
subl(blocks, 8); |
|
660 |
movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
|
661 |
movdqu(data, Address(input_data, 16 * 7)); |
|
662 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
663 |
//Loading 1*16 as calculated powers of H required starts at that location. |
|
664 |
movdqu(xmm15, Address(htbl, 1 * 16)); |
|
665 |
//Perform carryless multiplication of (H*2, data block #7) |
|
666 |
vpclmulhqlqdq(tmp2, data, xmm15);//a0 * b1 |
|
667 |
vpclmulldq(tmp0, data, xmm15);//a0 * b0 |
|
668 |
vpclmulhdq(tmp1, data, xmm15);//a1 * b1 |
|
669 |
vpclmullqhqdq(tmp3, data, xmm15);//a1* b0 |
|
670 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);// (a0 * b1) + (a1 * b0) |
|
671 |
||
672 |
movdqu(data, Address(input_data, 16 * 6)); |
|
673 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
674 |
// Perform carryless multiplication of (H^2 * 2, data block #6) |
|
675 |
schoolbookAAD(2, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
676 |
||
677 |
movdqu(data, Address(input_data, 16 * 5)); |
|
678 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
679 |
// Perform carryless multiplication of (H^3 * 2, data block #5) |
|
680 |
schoolbookAAD(3, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
681 |
movdqu(data, Address(input_data, 16 * 4)); |
|
682 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
683 |
// Perform carryless multiplication of (H^4 * 2, data block #4) |
|
684 |
schoolbookAAD(4, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
685 |
movdqu(data, Address(input_data, 16 * 3)); |
|
686 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
687 |
// Perform carryless multiplication of (H^5 * 2, data block #3) |
|
688 |
schoolbookAAD(5, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
689 |
movdqu(data, Address(input_data, 16 * 2)); |
|
690 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
691 |
// Perform carryless multiplication of (H^6 * 2, data block #2) |
|
692 |
schoolbookAAD(6, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
693 |
movdqu(data, Address(input_data, 16 * 1)); |
|
694 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
695 |
// Perform carryless multiplication of (H^7 * 2, data block #1) |
|
696 |
schoolbookAAD(7, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
697 |
movdqu(data, Address(input_data, 16 * 0)); |
|
698 |
// xor data block#0 with input state before perfoming carry-less multiplication |
|
699 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
700 |
vpxor(data, data, state, Assembler::AVX_128bit); |
|
701 |
// Perform carryless multiplication of (H^8 * 2, data block #0) |
|
702 |
schoolbookAAD(8, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
703 |
vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit); |
|
704 |
vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit); |
|
705 |
vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);// tmp0, tmp1 contains aggregated results of |
|
706 |
vpxor(tmp1, tmp1, tmp2, Assembler::AVX_128bit);// the multiplication operation |
|
707 |
||
708 |
// we have the 2 128-bit partially accumulated multiplication results in tmp0:tmp1 |
|
709 |
// with higher 128-bit in tmp1 and lower 128-bit in corresponding tmp0 |
|
710 |
// Follows the reduction technique mentioned in |
|
711 |
// Shift-XOR reduction described in Gueron-Kounavis May 2010 |
|
712 |
bind(BLOCK8_REDUCTION); |
|
713 |
// First Phase of the reduction |
|
714 |
vpslld(xmm8, tmp0, 31, Assembler::AVX_128bit); // packed right shifting << 31 |
|
715 |
vpslld(xmm9, tmp0, 30, Assembler::AVX_128bit); // packed right shifting << 30 |
|
716 |
vpslld(xmm10, tmp0, 25, Assembler::AVX_128bit); // packed right shifting << 25 |
|
717 |
// xor the shifted versions |
|
718 |
vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit); |
|
719 |
vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit); |
|
720 |
||
721 |
vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit); |
|
722 |
vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit); |
|
723 |
||
724 |
vpxor(tmp0, tmp0, xmm9, Assembler::AVX_128bit); // first phase of reduction is complete |
|
725 |
// second phase of the reduction |
|
726 |
vpsrld(xmm9, tmp0, 1, Assembler::AVX_128bit); // packed left shifting >> 1 |
|
727 |
vpsrld(xmm10, tmp0, 2, Assembler::AVX_128bit); // packed left shifting >> 2 |
|
728 |
vpsrld(tmp2, tmp0, 7, Assembler::AVX_128bit); // packed left shifting >> 7 |
|
729 |
// xor the shifted versions |
|
730 |
vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit); |
|
731 |
vpxor(xmm9, xmm9, tmp2, Assembler::AVX_128bit); |
|
732 |
vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit); |
|
733 |
vpxor(tmp0, xmm9, tmp0, Assembler::AVX_128bit); |
|
734 |
// Final result is in state |
|
735 |
vpxor(state, tmp0, tmp1, Assembler::AVX_128bit); |
|
736 |
||
737 |
lea(input_data, Address(input_data, 16 * 8)); |
|
738 |
cmpl(blocks, 8); |
|
739 |
jcc(Assembler::below, ONE_BLK_INIT); |
|
740 |
jmp(PROCESS_8_BLOCKS); |
|
741 |
||
742 |
// Since this is one block operation we will only use H * 2 i.e. the first power of H |
|
743 |
bind(ONE_BLK_INIT); |
|
744 |
movdqu(tmp0, Address(htbl, 1 * 16)); |
|
745 |
movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
|
746 |
||
747 |
//Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a reduction. |
|
748 |
bind(PROCESS_1_BLOCK); |
|
749 |
cmpl(blocks, 0); |
|
750 |
jcc(Assembler::equal, SAVE_STATE); |
|
751 |
subl(blocks, 1); |
|
752 |
movdqu(data, Address(input_data, 0)); |
|
753 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
754 |
vpxor(state, state, data, Assembler::AVX_128bit); |
|
755 |
// gfmul(H*2, state) |
|
756 |
call(GFMUL, relocInfo::none); |
|
757 |
addptr(input_data, 16); |
|
758 |
jmp(PROCESS_1_BLOCK); |
|
759 |
||
760 |
bind(SAVE_STATE); |
|
761 |
vpshufb(state, state, lswap_mask, Assembler::AVX_128bit); |
|
762 |
movdqu(Address(input_state, 0), state); |
|
763 |
jmp(EXIT_GHASH); |
|
764 |
||
765 |
bind(GFMUL); |
|
766 |
gfmul(tmp0, state); |
|
767 |
||
768 |
bind(GENERATE_HTBL_1_BLK); |
|
769 |
generateHtbl_one_block(htbl); |
|
770 |
||
771 |
bind(GENERATE_HTBL_8_BLKS); |
|
772 |
generateHtbl_eight_blocks(htbl); |
|
773 |
||
774 |
bind(EXIT_GHASH); |
|
775 |
// zero out xmm registers used for Htbl storage |
|
776 |
vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit); |
|
777 |
vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit); |
|
778 |
vpxor(xmm3, xmm3, xmm3, Assembler::AVX_128bit); |
|
779 |
vpxor(xmm15, xmm15, xmm15, Assembler::AVX_128bit); |
|
53017
e10a1f7aaa13
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents:
52990
diff
changeset
|
780 |
} |
58977
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
781 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
782 |
// AES Counter Mode using VAES instructions |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
783 |
void MacroAssembler::aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
784 |
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start) { |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
785 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
786 |
const Register rounds = 0; |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
787 |
const Register pos = r12; |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
788 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
789 |
Label PRELOOP_START, EXIT_PRELOOP, REMAINDER, REMAINDER_16, LOOP, END, EXIT, END_LOOP, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
790 |
AES192, AES256, AES192_REMAINDER16, REMAINDER16_END_LOOP, AES256_REMAINDER16, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
791 |
REMAINDER_8, REMAINDER_4, AES192_REMAINDER8, REMAINDER_LOOP, AES256_REMINDER, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
792 |
AES192_REMAINDER, END_REMAINDER_LOOP, AES256_REMAINDER8, REMAINDER8_END_LOOP, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
793 |
AES192_REMAINDER4, AES256_REMAINDER4, AES256_REMAINDER, END_REMAINDER4, EXTRACT_TAILBYTES, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
794 |
EXTRACT_TAIL_4BYTES, EXTRACT_TAIL_2BYTES, EXTRACT_TAIL_1BYTE, STORE_CTR; |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
795 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
796 |
cmpl(len_reg, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
797 |
jcc(Assembler::belowEqual, EXIT); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
798 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
799 |
movl(pos, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
800 |
// if the number of used encrypted counter bytes < 16, |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
801 |
// XOR PT with saved encrypted counter to obtain CT |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
802 |
bind(PRELOOP_START); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
803 |
cmpl(used, 16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
804 |
jcc(Assembler::aboveEqual, EXIT_PRELOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
805 |
movb(rbx, Address(saved_encCounter_start, used)); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
806 |
xorb(rbx, Address(src_addr, pos)); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
807 |
movb(Address(dest_addr, pos), rbx); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
808 |
addptr(pos, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
809 |
addptr(used, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
810 |
decrement(len_reg); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
811 |
jmp(PRELOOP_START); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
812 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
813 |
bind(EXIT_PRELOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
814 |
movl(Address(used_addr, 0), used); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
815 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
816 |
// Calculate number of rounds i.e. 10, 12, 14, based on key length(128, 192, 256). |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
817 |
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
818 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
819 |
vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
820 |
// Move initial counter value in xmm0 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
821 |
movdqu(xmm0, Address(counter, 0)); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
822 |
// broadcast counter value to zmm8 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
823 |
evshufi64x2(xmm8, xmm0, xmm0, 0, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
824 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
825 |
// load lbswap mask |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
826 |
evmovdquq(xmm16, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, r15); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
827 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
828 |
//shuffle counter using lbswap_mask |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
829 |
vpshufb(xmm8, xmm8, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
830 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
831 |
// pre-increment and propagate counter values to zmm9-zmm15 registers. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
832 |
// Linc0 increments the zmm8 by 1 (initial value being 0), Linc4 increments the counters zmm9-zmm15 by 4 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
833 |
// The counter is incremented after each block i.e. 16 bytes is processed; |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
834 |
// each zmm register has 4 counter values as its MSB |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
835 |
// the counters are incremented in parallel |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
836 |
vpaddd(xmm8, xmm8, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, r15);//linc0 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
837 |
vpaddd(xmm9, xmm8, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
838 |
vpaddd(xmm10, xmm9, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
839 |
vpaddd(xmm11, xmm10, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
840 |
vpaddd(xmm12, xmm11, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
841 |
vpaddd(xmm13, xmm12, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
842 |
vpaddd(xmm14, xmm13, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
843 |
vpaddd(xmm15, xmm14, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
844 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
845 |
// load linc32 mask in zmm register.linc32 increments counter by 32 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
846 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 256), Assembler::AVX_512bit, r15);//Linc32 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
847 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
848 |
// xmm31 contains the key shuffle mask. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
849 |
movdqu(xmm31, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
850 |
// Load key function loads 128 bit key and shuffles it. Then we broadcast the shuffled key to convert it into a 512 bit value. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
851 |
// For broadcasting the values to ZMM, vshufi64 is used instead of evbroadcasti64x2 as the source in this case is ZMM register |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
852 |
// that holds shuffled key value. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
853 |
ev_load_key(xmm20, key, 0, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
854 |
ev_load_key(xmm21, key, 1 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
855 |
ev_load_key(xmm22, key, 2 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
856 |
ev_load_key(xmm23, key, 3 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
857 |
ev_load_key(xmm24, key, 4 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
858 |
ev_load_key(xmm25, key, 5 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
859 |
ev_load_key(xmm26, key, 6 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
860 |
ev_load_key(xmm27, key, 7 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
861 |
ev_load_key(xmm28, key, 8 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
862 |
ev_load_key(xmm29, key, 9 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
863 |
ev_load_key(xmm30, key, 10 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
864 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
865 |
// Process 32 blocks or 512 bytes of data |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
866 |
bind(LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
867 |
cmpl(len_reg, 512); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
868 |
jcc(Assembler::less, REMAINDER); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
869 |
subq(len_reg, 512); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
870 |
//Shuffle counter and Exor it with roundkey1. Result is stored in zmm0-7 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
871 |
vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
872 |
evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
873 |
vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
874 |
evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
875 |
vpshufb(xmm2, xmm10, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
876 |
evpxorq(xmm2, xmm2, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
877 |
vpshufb(xmm3, xmm11, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
878 |
evpxorq(xmm3, xmm3, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
879 |
vpshufb(xmm4, xmm12, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
880 |
evpxorq(xmm4, xmm4, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
881 |
vpshufb(xmm5, xmm13, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
882 |
evpxorq(xmm5, xmm5, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
883 |
vpshufb(xmm6, xmm14, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
884 |
evpxorq(xmm6, xmm6, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
885 |
vpshufb(xmm7, xmm15, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
886 |
evpxorq(xmm7, xmm7, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
887 |
// Perform AES encode operations and put results in zmm0-zmm7. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
888 |
// This is followed by incrementing counter values in zmm8-zmm15. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
889 |
// Since we will be processing 32 blocks at a time, the counter is incremented by 32. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
890 |
roundEnc(xmm21, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
891 |
vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
892 |
roundEnc(xmm22, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
893 |
vpaddq(xmm9, xmm9, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
894 |
roundEnc(xmm23, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
895 |
vpaddq(xmm10, xmm10, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
896 |
roundEnc(xmm24, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
897 |
vpaddq(xmm11, xmm11, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
898 |
roundEnc(xmm25, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
899 |
vpaddq(xmm12, xmm12, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
900 |
roundEnc(xmm26, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
901 |
vpaddq(xmm13, xmm13, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
902 |
roundEnc(xmm27, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
903 |
vpaddq(xmm14, xmm14, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
904 |
roundEnc(xmm28, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
905 |
vpaddq(xmm15, xmm15, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
906 |
roundEnc(xmm29, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
907 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
908 |
cmpl(rounds, 52); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
909 |
jcc(Assembler::aboveEqual, AES192); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
910 |
lastroundEnc(xmm30, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
911 |
jmp(END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
912 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
913 |
bind(AES192); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
914 |
roundEnc(xmm30, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
915 |
ev_load_key(xmm18, key, 11 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
916 |
roundEnc(xmm18, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
917 |
cmpl(rounds, 60); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
918 |
jcc(Assembler::aboveEqual, AES256); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
919 |
ev_load_key(xmm18, key, 12 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
920 |
lastroundEnc(xmm18, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
921 |
jmp(END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
922 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
923 |
bind(AES256); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
924 |
ev_load_key(xmm18, key, 12 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
925 |
roundEnc(xmm18, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
926 |
ev_load_key(xmm18, key, 13 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
927 |
roundEnc(xmm18, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
928 |
ev_load_key(xmm18, key, 14 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
929 |
lastroundEnc(xmm18, 7); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
930 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
931 |
// After AES encode rounds, the encrypted block cipher lies in zmm0-zmm7 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
932 |
// xor encrypted block cipher and input plaintext and store resultant ciphertext |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
933 |
bind(END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
934 |
evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
935 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
936 |
evpxorq(xmm1, xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
937 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 64), xmm1, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
938 |
evpxorq(xmm2, xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
939 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
940 |
evpxorq(xmm3, xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
941 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
942 |
evpxorq(xmm4, xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
943 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
944 |
evpxorq(xmm5, xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
945 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
946 |
evpxorq(xmm6, xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
947 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
948 |
evpxorq(xmm7, xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
949 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
950 |
addq(pos, 512); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
951 |
jmp(LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
952 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
953 |
// Encode 256, 128, 64 or 16 bytes at a time if length is less than 512 bytes |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
954 |
bind(REMAINDER); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
955 |
cmpl(len_reg, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
956 |
jcc(Assembler::equal, END); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
957 |
cmpl(len_reg, 256); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
958 |
jcc(Assembler::aboveEqual, REMAINDER_16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
959 |
cmpl(len_reg, 128); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
960 |
jcc(Assembler::aboveEqual, REMAINDER_8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
961 |
cmpl(len_reg, 64); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
962 |
jcc(Assembler::aboveEqual, REMAINDER_4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
963 |
// At this point, we will process 16 bytes of data at a time. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
964 |
// So load xmm19 with counter increment value as 1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
965 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
966 |
jmp(REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
967 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
968 |
// Each ZMM register can be used to encode 64 bytes of data, so we have 4 ZMM registers to encode 256 bytes of data |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
969 |
bind(REMAINDER_16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
970 |
subq(len_reg, 256); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
971 |
// As we process 16 blocks at a time, load mask for incrementing the counter value by 16 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
972 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 320), Assembler::AVX_512bit, r15);//Linc16(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
973 |
// shuffle counter and XOR counter with roundkey1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
974 |
vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
975 |
evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
976 |
vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
977 |
evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
978 |
vpshufb(xmm2, xmm10, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
979 |
evpxorq(xmm2, xmm2, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
980 |
vpshufb(xmm3, xmm11, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
981 |
evpxorq(xmm3, xmm3, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
982 |
// Increment counter values by 16 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
983 |
vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
984 |
vpaddq(xmm9, xmm9, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
985 |
// AES encode rounds |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
986 |
roundEnc(xmm21, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
987 |
roundEnc(xmm22, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
988 |
roundEnc(xmm23, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
989 |
roundEnc(xmm24, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
990 |
roundEnc(xmm25, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
991 |
roundEnc(xmm26, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
992 |
roundEnc(xmm27, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
993 |
roundEnc(xmm28, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
994 |
roundEnc(xmm29, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
995 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
996 |
cmpl(rounds, 52); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
997 |
jcc(Assembler::aboveEqual, AES192_REMAINDER16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
998 |
lastroundEnc(xmm30, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
999 |
jmp(REMAINDER16_END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1000 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1001 |
bind(AES192_REMAINDER16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1002 |
roundEnc(xmm30, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1003 |
ev_load_key(xmm18, key, 11 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1004 |
roundEnc(xmm18, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1005 |
ev_load_key(xmm5, key, 12 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1006 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1007 |
cmpl(rounds, 60); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1008 |
jcc(Assembler::aboveEqual, AES256_REMAINDER16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1009 |
lastroundEnc(xmm5, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1010 |
jmp(REMAINDER16_END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1011 |
bind(AES256_REMAINDER16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1012 |
roundEnc(xmm5, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1013 |
ev_load_key(xmm6, key, 13 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1014 |
roundEnc(xmm6, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1015 |
ev_load_key(xmm7, key, 14 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1016 |
lastroundEnc(xmm7, 3); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1017 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1018 |
// After AES encode rounds, the encrypted block cipher lies in zmm0-zmm3 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1019 |
// xor 256 bytes of PT with the encrypted counters to produce CT. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1020 |
bind(REMAINDER16_END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1021 |
evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1022 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1023 |
evpxorq(xmm1, xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1024 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1025 |
evpxorq(xmm2, xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1026 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1027 |
evpxorq(xmm3, xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1028 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1029 |
addq(pos, 256); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1030 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1031 |
cmpl(len_reg, 128); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1032 |
jcc(Assembler::aboveEqual, REMAINDER_8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1033 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1034 |
cmpl(len_reg, 64); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1035 |
jcc(Assembler::aboveEqual, REMAINDER_4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1036 |
//load mask for incrementing the counter value by 1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1037 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1038 |
jmp(REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1039 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1040 |
// Each ZMM register can be used to encode 64 bytes of data, so we have 2 ZMM registers to encode 128 bytes of data |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1041 |
bind(REMAINDER_8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1042 |
subq(len_reg, 128); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1043 |
// As we process 8 blocks at a time, load mask for incrementing the counter value by 8 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1044 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 192), Assembler::AVX_512bit, r15);//Linc8(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1045 |
// shuffle counters and xor with roundkey1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1046 |
vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1047 |
evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1048 |
vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1049 |
evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1050 |
// increment counter by 8 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1051 |
vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1052 |
// AES encode |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1053 |
roundEnc(xmm21, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1054 |
roundEnc(xmm22, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1055 |
roundEnc(xmm23, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1056 |
roundEnc(xmm24, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1057 |
roundEnc(xmm25, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1058 |
roundEnc(xmm26, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1059 |
roundEnc(xmm27, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1060 |
roundEnc(xmm28, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1061 |
roundEnc(xmm29, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1062 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1063 |
cmpl(rounds, 52); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1064 |
jcc(Assembler::aboveEqual, AES192_REMAINDER8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1065 |
lastroundEnc(xmm30, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1066 |
jmp(REMAINDER8_END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1067 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1068 |
bind(AES192_REMAINDER8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1069 |
roundEnc(xmm30, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1070 |
ev_load_key(xmm18, key, 11 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1071 |
roundEnc(xmm18, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1072 |
ev_load_key(xmm5, key, 12 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1073 |
cmpl(rounds, 60); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1074 |
jcc(Assembler::aboveEqual, AES256_REMAINDER8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1075 |
lastroundEnc(xmm5, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1076 |
jmp(REMAINDER8_END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1077 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1078 |
bind(AES256_REMAINDER8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1079 |
roundEnc(xmm5, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1080 |
ev_load_key(xmm6, key, 13 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1081 |
roundEnc(xmm6, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1082 |
ev_load_key(xmm7, key, 14 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1083 |
lastroundEnc(xmm7, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1084 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1085 |
bind(REMAINDER8_END_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1086 |
// After AES encode rounds, the encrypted block cipher lies in zmm0-zmm1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1087 |
// XOR PT with the encrypted counter and store as CT |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1088 |
evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1089 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1090 |
evpxorq(xmm1, xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1091 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1092 |
addq(pos, 128); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1093 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1094 |
cmpl(len_reg, 64); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1095 |
jcc(Assembler::aboveEqual, REMAINDER_4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1096 |
// load mask for incrementing the counter value by 1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1097 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1098 |
jmp(REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1099 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1100 |
// Each ZMM register can be used to encode 64 bytes of data, so we have 1 ZMM register used in this block of code |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1101 |
bind(REMAINDER_4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1102 |
subq(len_reg, 64); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1103 |
// As we process 4 blocks at a time, load mask for incrementing the counter value by 4 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1104 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1105 |
// XOR counter with first roundkey |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1106 |
vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1107 |
evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1108 |
// Increment counter |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1109 |
vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1110 |
vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1111 |
vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1112 |
vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1113 |
vaesenc(xmm0, xmm0, xmm24, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1114 |
vaesenc(xmm0, xmm0, xmm25, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1115 |
vaesenc(xmm0, xmm0, xmm26, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1116 |
vaesenc(xmm0, xmm0, xmm27, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1117 |
vaesenc(xmm0, xmm0, xmm28, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1118 |
vaesenc(xmm0, xmm0, xmm29, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1119 |
cmpl(rounds, 52); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1120 |
jcc(Assembler::aboveEqual, AES192_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1121 |
vaesenclast(xmm0, xmm0, xmm30, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1122 |
jmp(END_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1123 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1124 |
bind(AES192_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1125 |
vaesenc(xmm0, xmm0, xmm30, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1126 |
ev_load_key(xmm18, key, 11 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1127 |
vaesenc(xmm0, xmm0, xmm18, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1128 |
ev_load_key(xmm5, key, 12 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1129 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1130 |
cmpl(rounds, 60); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1131 |
jcc(Assembler::aboveEqual, AES256_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1132 |
vaesenclast(xmm0, xmm0, xmm5, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1133 |
jmp(END_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1134 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1135 |
bind(AES256_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1136 |
vaesenc(xmm0, xmm0, xmm5, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1137 |
ev_load_key(xmm6, key, 13 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1138 |
vaesenc(xmm0, xmm0, xmm6, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1139 |
ev_load_key(xmm7, key, 14 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1140 |
vaesenclast(xmm0, xmm0, xmm7, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1141 |
// After AES encode rounds, the encrypted block cipher lies in zmm0. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1142 |
// XOR encrypted block cipher with PT and store 64 bytes of ciphertext |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1143 |
bind(END_REMAINDER4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1144 |
evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1145 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1146 |
addq(pos, 64); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1147 |
// load mask for incrementing the counter value by 1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1148 |
evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip) |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1149 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1150 |
// For a single block, the AES rounds start here. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1151 |
bind(REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1152 |
cmpl(len_reg, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1153 |
jcc(Assembler::belowEqual, END); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1154 |
// XOR counter with first roundkey |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1155 |
vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1156 |
evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1157 |
vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1158 |
// Increment counter by 1 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1159 |
vpaddq(xmm8, xmm8, xmm19, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1160 |
vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1161 |
vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1162 |
vaesenc(xmm0, xmm0, xmm24, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1163 |
vaesenc(xmm0, xmm0, xmm25, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1164 |
vaesenc(xmm0, xmm0, xmm26, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1165 |
vaesenc(xmm0, xmm0, xmm27, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1166 |
vaesenc(xmm0, xmm0, xmm28, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1167 |
vaesenc(xmm0, xmm0, xmm29, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1168 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1169 |
cmpl(rounds, 52); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1170 |
jcc(Assembler::aboveEqual, AES192_REMAINDER); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1171 |
vaesenclast(xmm0, xmm0, xmm30, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1172 |
jmp(END_REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1173 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1174 |
bind(AES192_REMAINDER); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1175 |
vaesenc(xmm0, xmm0, xmm30, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1176 |
ev_load_key(xmm18, key, 11 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1177 |
vaesenc(xmm0, xmm0, xmm18, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1178 |
ev_load_key(xmm5, key, 12 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1179 |
cmpl(rounds, 60); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1180 |
jcc(Assembler::aboveEqual, AES256_REMAINDER); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1181 |
vaesenclast(xmm0, xmm0, xmm5, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1182 |
jmp(END_REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1183 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1184 |
bind(AES256_REMAINDER); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1185 |
vaesenc(xmm0, xmm0, xmm5, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1186 |
ev_load_key(xmm6, key, 13 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1187 |
vaesenc(xmm0, xmm0, xmm6, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1188 |
ev_load_key(xmm7, key, 14 * 16, xmm31); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1189 |
vaesenclast(xmm0, xmm0, xmm7, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1190 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1191 |
bind(END_REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1192 |
// If the length register is less than the blockSize i.e. 16 |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1193 |
// then we store only those bytes of the CT to the destination |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1194 |
// corresponding to the length register value |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1195 |
// extracting the exact number of bytes is handled by EXTRACT_TAILBYTES |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1196 |
cmpl(len_reg, 16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1197 |
jcc(Assembler::less, EXTRACT_TAILBYTES); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1198 |
subl(len_reg, 16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1199 |
// After AES encode rounds, the encrypted block cipher lies in xmm0. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1200 |
// If the length register is equal to 16 bytes, store CT in dest after XOR operation. |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1201 |
evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1202 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1203 |
addl(pos, 16); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1204 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1205 |
jmp(REMAINDER_LOOP); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1206 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1207 |
bind(EXTRACT_TAILBYTES); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1208 |
// Save encrypted counter value in xmm0 for next invocation, before XOR operation |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1209 |
movdqu(Address(saved_encCounter_start, 0), xmm0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1210 |
// XOR encryted block cipher in xmm0 with PT to produce CT |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1211 |
evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1212 |
// extract upto 15 bytes of CT from xmm0 as specified by length register |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1213 |
testptr(len_reg, 8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1214 |
jcc(Assembler::zero, EXTRACT_TAIL_4BYTES); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1215 |
pextrq(Address(dest_addr, pos), xmm0, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1216 |
psrldq(xmm0, 8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1217 |
addl(pos, 8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1218 |
bind(EXTRACT_TAIL_4BYTES); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1219 |
testptr(len_reg, 4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1220 |
jcc(Assembler::zero, EXTRACT_TAIL_2BYTES); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1221 |
pextrd(Address(dest_addr, pos), xmm0, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1222 |
psrldq(xmm0, 4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1223 |
addq(pos, 4); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1224 |
bind(EXTRACT_TAIL_2BYTES); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1225 |
testptr(len_reg, 2); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1226 |
jcc(Assembler::zero, EXTRACT_TAIL_1BYTE); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1227 |
pextrw(Address(dest_addr, pos), xmm0, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1228 |
psrldq(xmm0, 2); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1229 |
addl(pos, 2); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1230 |
bind(EXTRACT_TAIL_1BYTE); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1231 |
testptr(len_reg, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1232 |
jcc(Assembler::zero, END); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1233 |
pextrb(Address(dest_addr, pos), xmm0, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1234 |
addl(pos, 1); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1235 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1236 |
bind(END); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1237 |
// If there are no tail bytes, store counter value and exit |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1238 |
cmpl(len_reg, 0); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1239 |
jcc(Assembler::equal, STORE_CTR); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1240 |
movl(Address(used_addr, 0), len_reg); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1241 |
|
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1242 |
bind(STORE_CTR); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1243 |
//shuffle updated counter and store it |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1244 |
vpshufb(xmm8, xmm8, xmm16, Assembler::AVX_128bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1245 |
movdqu(Address(counter, 0), xmm8); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1246 |
// Zero out counter and key registers |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1247 |
evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1248 |
evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1249 |
evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1250 |
evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1251 |
evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1252 |
evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1253 |
evpxorq(xmm25, xmm25, xmm25, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1254 |
evpxorq(xmm26, xmm26, xmm26, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1255 |
evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1256 |
evpxorq(xmm28, xmm28, xmm28, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1257 |
evpxorq(xmm29, xmm29, xmm29, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1258 |
evpxorq(xmm30, xmm30, xmm30, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1259 |
cmpl(rounds, 44); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1260 |
jcc(Assembler::belowEqual, EXIT); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1261 |
evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1262 |
evpxorq(xmm5, xmm5, xmm5, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1263 |
cmpl(rounds, 52); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1264 |
jcc(Assembler::belowEqual, EXIT); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1265 |
evpxorq(xmm6, xmm6, xmm6, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1266 |
evpxorq(xmm7, xmm7, xmm7, Assembler::AVX_512bit); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1267 |
bind(EXIT); |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1268 |
} |
c6a789f495fe
8233741: AES Countermode (AES-CTR) optimization using AVX512 + VAES instructions
kvn
parents:
57786
diff
changeset
|
1269 |
|
53017
e10a1f7aaa13
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents:
52990
diff
changeset
|
1270 |
#endif // _LP64 |