author | jlahoda |
Tue, 24 Sep 2019 15:40:26 +0200 | |
branch | JDK-8226585-branch |
changeset 58290 | d885633d9de4 |
parent 57786 | 948ac3112da8 |
child 58977 | c6a789f495fe |
permissions | -rw-r--r-- |
52990 | 1 |
/* |
2 |
* Copyright (c) 2018, Intel Corporation. |
|
3 |
* |
|
4 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
5 |
* |
|
6 |
* This code is free software; you can redistribute it and/or modify it |
|
7 |
* under the terms of the GNU General Public License version 2 only, as |
|
8 |
* published by the Free Software Foundation. |
|
9 |
* |
|
10 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
14 |
* accompanied this code). |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU General Public License version |
|
17 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
18 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
* |
|
20 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 |
* or visit www.oracle.com if you need additional information or have any |
|
22 |
* questions. |
|
23 |
* |
|
24 |
*/ |
|
25 |
||
26 |
#include "precompiled.hpp" |
|
27 |
#include "asm/assembler.hpp" |
|
28 |
#include "asm/assembler.inline.hpp" |
|
29 |
#include "runtime/stubRoutines.hpp" |
|
30 |
#include "macroAssembler_x86.hpp" |
|
31 |
||
53017
e10a1f7aaa13
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents:
52990
diff
changeset
|
32 |
#ifdef _LP64 |
57786
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
33 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
34 |
void MacroAssembler::roundEnc(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
35 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
36 |
vaesenc(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
37 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
38 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
39 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
40 |
void MacroAssembler::lastroundEnc(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
41 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
42 |
vaesenclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
43 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
44 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
45 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
46 |
void MacroAssembler::roundDec(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
47 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
48 |
vaesdec(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
49 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
50 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
51 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
52 |
void MacroAssembler::lastroundDec(XMMRegister key, int rnum) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
53 |
for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
54 |
vaesdeclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
55 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
56 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
57 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
58 |
// Load key and shuffle operation |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
59 |
void MacroAssembler::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
60 |
movdqu(xmmdst, Address(key, offset)); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
61 |
if (xmm_shuf_mask != NULL) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
62 |
pshufb(xmmdst, xmm_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
63 |
} else { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
64 |
pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
65 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
66 |
evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
67 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
68 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
69 |
// AES-ECB Encrypt Operation |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
70 |
void MacroAssembler::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
71 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
72 |
const Register pos = rax; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
73 |
const Register rounds = r12; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
74 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
75 |
Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
76 |
push(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
77 |
push(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
78 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
79 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
80 |
// context for the registers used, where all instructions below are using 128-bit mode |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
81 |
// On EVEX without VL and BW, these instructions will all be AVX. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
82 |
if (VM_Version::supports_avx512vlbw()) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
83 |
movl(rax, 0xffff); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
84 |
kmovql(k1, rax); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
85 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
86 |
push(len); // Save |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
87 |
push(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
88 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
89 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
90 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
91 |
xorptr(pos, pos); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
92 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
93 |
// Calculate number of rounds based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
94 |
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
95 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
96 |
// Load Key shuf mask |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
97 |
const XMMRegister xmm_key_shuf_mask = xmm31; // used temporarily to swap key bytes up front |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
98 |
movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
99 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
100 |
// Load and shuffle key based on number of rounds |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
101 |
ev_load_key(xmm8, key, 0 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
102 |
ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
103 |
ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
104 |
ev_load_key(xmm23, key, 3 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
105 |
ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
106 |
ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
107 |
ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
108 |
ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
109 |
ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
110 |
ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
111 |
ev_load_key(xmm24, key, 10 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
112 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
113 |
jcc(Assembler::greaterEqual, KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
114 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
115 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
116 |
bind(KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
117 |
ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
118 |
ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
119 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
120 |
jcc(Assembler::equal, KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
121 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
122 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
123 |
bind(KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
124 |
ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
125 |
ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
126 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
127 |
bind(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
128 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
129 |
// Divide length by 16 to convert it to number of blocks |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
130 |
shrq(len, 4); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
131 |
shlq(rbx, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
132 |
jcc(Assembler::equal, NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
133 |
addq(len, 1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
134 |
// Check if number of blocks is greater than or equal to 32 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
135 |
// If true, 512 bytes are processed at a time (code marked by label LOOP) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
136 |
// If not, 16 bytes are processed (code marked by REMAINDER label) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
137 |
bind(NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
138 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
139 |
shrq(len, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
140 |
jcc(Assembler::equal, REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
141 |
movl(r13, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
142 |
// Compute number of blocks that will be processed 512 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
143 |
// Subtract this from the total number of blocks which will then be processed by REMAINDER loop |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
144 |
shlq(r13, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
145 |
subq(rbx, r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
146 |
//Begin processing 512 bytes |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
147 |
bind(LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
148 |
// Move 64 bytes of PT data into a zmm register, as a result 512 bytes of PT loaded in zmm0-7 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
149 |
evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
150 |
evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
151 |
evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
152 |
evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
153 |
evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
154 |
evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
155 |
evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
156 |
evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
157 |
// Xor with the first round key |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
158 |
evpxorq(xmm0, xmm0, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
159 |
evpxorq(xmm1, xmm1, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
160 |
evpxorq(xmm2, xmm2, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
161 |
evpxorq(xmm3, xmm3, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
162 |
evpxorq(xmm4, xmm4, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
163 |
evpxorq(xmm5, xmm5, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
164 |
evpxorq(xmm6, xmm6, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
165 |
evpxorq(xmm7, xmm7, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
166 |
// 9 Aes encode round operations |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
167 |
roundEnc(xmm9, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
168 |
roundEnc(xmm10, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
169 |
roundEnc(xmm23, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
170 |
roundEnc(xmm12, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
171 |
roundEnc(xmm13, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
172 |
roundEnc(xmm14, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
173 |
roundEnc(xmm15, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
174 |
roundEnc(xmm16, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
175 |
roundEnc(xmm17, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
176 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
177 |
jcc(Assembler::aboveEqual, AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
178 |
// Aesenclast round operation for keysize = 128 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
179 |
lastroundEnc(xmm24, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
180 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
181 |
//Additional 2 rounds of Aesenc operation for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
182 |
bind(AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
183 |
roundEnc(xmm24, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
184 |
roundEnc(xmm19, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
185 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
186 |
jcc(Assembler::aboveEqual, AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
187 |
// Aesenclast round for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
188 |
lastroundEnc(xmm20, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
189 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
190 |
// 2 rounds of Aesenc operation and Aesenclast for keysize = 256 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
191 |
bind(AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
192 |
roundEnc(xmm20, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
193 |
roundEnc(xmm21, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
194 |
lastroundEnc(xmm22, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
195 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
196 |
bind(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
197 |
// Move 512 bytes of CT to destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
198 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
199 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
200 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
201 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
202 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
203 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
204 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
205 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
206 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
207 |
addq(pos, 512); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
208 |
decq(len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
209 |
jcc(Assembler::notEqual, LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
210 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
211 |
bind(REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
212 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
213 |
cmpq(rbx, 0); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
214 |
jcc(Assembler::equal, END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
215 |
// Process 16 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
216 |
bind(LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
217 |
movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0)); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
218 |
vpxor(xmm1, xmm1, xmm8, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
219 |
// xmm2 contains shuffled key for Aesenclast operation. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
220 |
vmovdqu(xmm2, xmm24); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
221 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
222 |
vaesenc(xmm1, xmm1, xmm9, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
223 |
vaesenc(xmm1, xmm1, xmm10, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
224 |
vaesenc(xmm1, xmm1, xmm23, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
225 |
vaesenc(xmm1, xmm1, xmm12, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
226 |
vaesenc(xmm1, xmm1, xmm13, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
227 |
vaesenc(xmm1, xmm1, xmm14, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
228 |
vaesenc(xmm1, xmm1, xmm15, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
229 |
vaesenc(xmm1, xmm1, xmm16, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
230 |
vaesenc(xmm1, xmm1, xmm17, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
231 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
232 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
233 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
234 |
vmovdqu(xmm2, xmm20); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
235 |
vaesenc(xmm1, xmm1, xmm24, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
236 |
vaesenc(xmm1, xmm1, xmm19, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
237 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
238 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
239 |
vmovdqu(xmm2, xmm22); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
240 |
vaesenc(xmm1, xmm1, xmm20, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
241 |
vaesenc(xmm1, xmm1, xmm21, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
242 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
243 |
bind(LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
244 |
// Aesenclast round |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
245 |
vaesenclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
246 |
// Write 16 bytes of CT to destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
247 |
movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
248 |
addq(pos, 16); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
249 |
decq(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
250 |
jcc(Assembler::notEqual, LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
251 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
252 |
bind(END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
253 |
// Zero out the round keys |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
254 |
evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
255 |
evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
256 |
evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
257 |
evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
258 |
evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
259 |
evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
260 |
evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
261 |
evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
262 |
evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
263 |
evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
264 |
evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
265 |
cmpl(rounds, 44); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
266 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
267 |
evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
268 |
evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
269 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
270 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
271 |
evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
272 |
evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
273 |
bind(EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
274 |
pop(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
275 |
pop(rax); // return length |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
276 |
pop(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
277 |
pop(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
278 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
279 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
280 |
// AES-ECB Decrypt Operation |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
281 |
void MacroAssembler::aesecb_decrypt(Register src_addr, Register dest_addr, Register key, Register len) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
282 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
283 |
Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
284 |
const Register pos = rax; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
285 |
const Register rounds = r12; |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
286 |
push(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
287 |
push(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
288 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
289 |
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
290 |
// context for the registers used, where all instructions below are using 128-bit mode |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
291 |
// On EVEX without VL and BW, these instructions will all be AVX. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
292 |
if (VM_Version::supports_avx512vlbw()) { |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
293 |
movl(rax, 0xffff); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
294 |
kmovql(k1, rax); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
295 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
296 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
297 |
push(len); // Save |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
298 |
push(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
299 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
300 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
301 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
302 |
xorptr(pos, pos); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
303 |
// Calculate number of rounds i.e. based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
304 |
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
305 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
306 |
// Load Key shuf mask |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
307 |
const XMMRegister xmm_key_shuf_mask = xmm31; // used temporarily to swap key bytes up front |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
308 |
movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
309 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
310 |
// Load and shuffle round keys. The java expanded key ordering is rotated one position in decryption. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
311 |
// So the first round key is loaded from 1*16 here and last round key is loaded from 0*16 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
312 |
ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
313 |
ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
314 |
ev_load_key(xmm11, key, 3 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
315 |
ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
316 |
ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
317 |
ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
318 |
ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
319 |
ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
320 |
ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
321 |
ev_load_key(xmm18, key, 10 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
322 |
ev_load_key(xmm27, key, 0 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
323 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
324 |
jcc(Assembler::greaterEqual, KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
325 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
326 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
327 |
bind(KEY_192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
328 |
ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
329 |
ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
330 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
331 |
jcc(Assembler::equal, KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
332 |
jmp(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
333 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
334 |
bind(KEY_256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
335 |
ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
336 |
ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
337 |
bind(Loop_start); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
338 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
339 |
// Convert input length to number of blocks |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
340 |
shrq(len, 4); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
341 |
shlq(rbx, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
342 |
jcc(Assembler::equal, NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
343 |
addq(len, 1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
344 |
// Check if number of blocks is greater than/ equal to 32 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
345 |
// If true, blocks then 512 bytes are processed at a time (code marked by label LOOP) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
346 |
// If not, 16 bytes are processed (code marked by label REMAINDER) |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
347 |
bind(NO_PARTS); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
348 |
movq(rbx, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
349 |
shrq(len, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
350 |
jcc(Assembler::equal, REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
351 |
movl(r13, len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
352 |
// Compute number of blocks that will be processed as 512 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
353 |
// Subtract this from the total number of blocks, which will then be processed by REMAINDER loop. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
354 |
shlq(r13, 5); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
355 |
subq(rbx, r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
356 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
357 |
bind(LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
358 |
// Move 64 bytes of CT data into a zmm register, as a result 512 bytes of CT loaded in zmm0-7 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
359 |
evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
360 |
evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
361 |
evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
362 |
evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
363 |
evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
364 |
evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
365 |
evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
366 |
evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
367 |
// Xor with the first round key |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
368 |
evpxorq(xmm0, xmm0, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
369 |
evpxorq(xmm1, xmm1, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
370 |
evpxorq(xmm2, xmm2, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
371 |
evpxorq(xmm3, xmm3, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
372 |
evpxorq(xmm4, xmm4, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
373 |
evpxorq(xmm5, xmm5, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
374 |
evpxorq(xmm6, xmm6, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
375 |
evpxorq(xmm7, xmm7, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
376 |
// 9 rounds of Aesdec |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
377 |
roundDec(xmm10, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
378 |
roundDec(xmm11, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
379 |
roundDec(xmm12, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
380 |
roundDec(xmm13, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
381 |
roundDec(xmm14, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
382 |
roundDec(xmm15, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
383 |
roundDec(xmm16, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
384 |
roundDec(xmm17, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
385 |
roundDec(xmm18, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
386 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
387 |
jcc(Assembler::aboveEqual, AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
388 |
// Aesdeclast round for keysize = 128 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
389 |
lastroundDec(xmm27, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
390 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
391 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
392 |
bind(AES192); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
393 |
// 2 Additional rounds for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
394 |
roundDec(xmm19, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
395 |
roundDec(xmm20, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
396 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
397 |
jcc(Assembler::aboveEqual, AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
398 |
// Aesdeclast round for keysize = 192 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
399 |
lastroundDec(xmm27, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
400 |
jmp(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
401 |
bind(AES256); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
402 |
// 2 Additional rounds and Aesdeclast for keysize = 256 |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
403 |
roundDec(xmm21, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
404 |
roundDec(xmm22, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
405 |
lastroundDec(xmm27, 7); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
406 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
407 |
bind(END_LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
408 |
// Write 512 bytes of PT to the destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
409 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
410 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
411 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
412 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
413 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
414 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
415 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
416 |
evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
417 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
418 |
addq(pos, 512); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
419 |
decq(len); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
420 |
jcc(Assembler::notEqual, LOOP); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
421 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
422 |
bind(REMAINDER); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
423 |
vzeroupper(); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
424 |
cmpq(rbx, 0); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
425 |
jcc(Assembler::equal, END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
426 |
// Process 16 bytes at a time |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
427 |
bind(LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
428 |
movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0)); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
429 |
vpxor(xmm1, xmm1, xmm9, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
430 |
// xmm2 contains shuffled key for Aesdeclast operation. |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
431 |
vmovdqu(xmm2, xmm27); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
432 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
433 |
vaesdec(xmm1, xmm1, xmm10, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
434 |
vaesdec(xmm1, xmm1, xmm11, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
435 |
vaesdec(xmm1, xmm1, xmm12, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
436 |
vaesdec(xmm1, xmm1, xmm13, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
437 |
vaesdec(xmm1, xmm1, xmm14, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
438 |
vaesdec(xmm1, xmm1, xmm15, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
439 |
vaesdec(xmm1, xmm1, xmm16, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
440 |
vaesdec(xmm1, xmm1, xmm17, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
441 |
vaesdec(xmm1, xmm1, xmm18, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
442 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
443 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
444 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
445 |
vaesdec(xmm1, xmm1, xmm19, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
446 |
vaesdec(xmm1, xmm1, xmm20, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
447 |
cmpl(rounds, 60); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
448 |
jcc(Assembler::below, LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
449 |
vaesdec(xmm1, xmm1, xmm21, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
450 |
vaesdec(xmm1, xmm1, xmm22, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
451 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
452 |
bind(LAST2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
453 |
// Aesdeclast round |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
454 |
vaesdeclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
455 |
// Write 16 bytes of PT to destination |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
456 |
movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
457 |
addq(pos, 16); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
458 |
decq(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
459 |
jcc(Assembler::notEqual, LOOP2); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
460 |
|
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
461 |
bind(END); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
462 |
// Zero out the round keys |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
463 |
evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
464 |
evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
465 |
evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
466 |
evpxorq(xmm11, xmm11, xmm11, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
467 |
evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
468 |
evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
469 |
evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
470 |
evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
471 |
evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
472 |
evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
473 |
evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
474 |
evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
475 |
cmpl(rounds, 44); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
476 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
477 |
evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
478 |
evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
479 |
cmpl(rounds, 52); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
480 |
jcc(Assembler::belowEqual, EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
481 |
evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
482 |
evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
483 |
bind(EXIT); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
484 |
pop(rbx); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
485 |
pop(rax); // return length |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
486 |
pop(r12); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
487 |
pop(r13); |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
488 |
} |
948ac3112da8
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
srukmannagar
parents:
53017
diff
changeset
|
489 |
|
52990 | 490 |
// Multiply 128 x 128 bits, using 4 pclmulqdq operations |
491 |
void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data, |
|
492 |
XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) { |
|
493 |
movdqu(xmm15, Address(htbl, i * 16)); |
|
494 |
vpclmulhqlqdq(tmp3, data, xmm15); // 0x01 |
|
495 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); |
|
496 |
vpclmulldq(tmp3, data, xmm15); // 0x00 |
|
497 |
vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit); |
|
498 |
vpclmulhdq(tmp3, data, xmm15); // 0x11 |
|
499 |
vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit); |
|
500 |
vpclmullqhqdq(tmp3, data, xmm15); // 0x10 |
|
501 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); |
|
502 |
} |
|
503 |
||
504 |
// Multiply two 128 bit numbers resulting in a 256 bit value |
|
505 |
// Result of the multiplication followed by reduction stored in state |
|
506 |
void MacroAssembler::gfmul(XMMRegister tmp0, XMMRegister state) { |
|
507 |
const XMMRegister tmp1 = xmm4; |
|
508 |
const XMMRegister tmp2 = xmm5; |
|
509 |
const XMMRegister tmp3 = xmm6; |
|
510 |
const XMMRegister tmp4 = xmm7; |
|
511 |
||
512 |
vpclmulldq(tmp1, state, tmp0); //0x00 (a0 * b0) |
|
513 |
vpclmulhdq(tmp4, state, tmp0);//0x11 (a1 * b1) |
|
514 |
vpclmullqhqdq(tmp2, state, tmp0);//0x10 (a1 * b0) |
|
515 |
vpclmulhqlqdq(tmp3, state, tmp0); //0x01 (a0 * b1) |
|
516 |
||
517 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); // (a0 * b1) + (a1 * b0) |
|
518 |
||
519 |
vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit); |
|
520 |
vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit); |
|
521 |
vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit); // tmp1 and tmp4 hold the result |
|
522 |
vpxor(tmp4, tmp4, tmp2, Assembler::AVX_128bit); // of carryless multiplication |
|
523 |
// Follows the reduction technique mentioned in |
|
524 |
// Shift-XOR reduction described in Gueron-Kounavis May 2010 |
|
525 |
// First phase of reduction |
|
526 |
// |
|
527 |
vpslld(xmm8, tmp1, 31, Assembler::AVX_128bit); // packed right shift shifting << 31 |
|
528 |
vpslld(xmm9, tmp1, 30, Assembler::AVX_128bit); // packed right shift shifting << 30 |
|
529 |
vpslld(xmm10, tmp1, 25, Assembler::AVX_128bit);// packed right shift shifting << 25 |
|
530 |
// xor the shifted versions |
|
531 |
vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit); |
|
532 |
vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit); |
|
533 |
vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit); |
|
534 |
vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit); |
|
535 |
vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit);// first phase of the reduction complete |
|
536 |
// |
|
537 |
// Second phase of the reduction |
|
538 |
// |
|
539 |
vpsrld(xmm9, tmp1, 1, Assembler::AVX_128bit);// packed left shifting >> 1 |
|
540 |
vpsrld(xmm10, tmp1, 2, Assembler::AVX_128bit);// packed left shifting >> 2 |
|
541 |
vpsrld(xmm11, tmp1, 7, Assembler::AVX_128bit);// packed left shifting >> 7 |
|
542 |
vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit);// xor the shifted versions |
|
543 |
vpxor(xmm9, xmm9, xmm11, Assembler::AVX_128bit); |
|
544 |
vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit); |
|
545 |
vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit); |
|
546 |
vpxor(state, tmp4, tmp1, Assembler::AVX_128bit);// the result is in state |
|
547 |
ret(0); |
|
548 |
} |
|
549 |
||
550 |
// This method takes the subkey after expansion as input and generates 1 * 16 power of subkey H. |
|
551 |
// The power of H is used in reduction process for one block ghash |
|
552 |
void MacroAssembler::generateHtbl_one_block(Register htbl) { |
|
553 |
const XMMRegister t = xmm13; |
|
554 |
||
555 |
// load the original subkey hash |
|
556 |
movdqu(t, Address(htbl, 0)); |
|
557 |
// shuffle using long swap mask |
|
558 |
movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
|
559 |
vpshufb(t, t, xmm10, Assembler::AVX_128bit); |
|
560 |
||
561 |
// Compute H' = GFMUL(H, 2) |
|
562 |
vpsrld(xmm3, t, 7, Assembler::AVX_128bit); |
|
563 |
movdqu(xmm4, ExternalAddress(StubRoutines::x86::ghash_shufflemask_addr())); |
|
564 |
vpshufb(xmm3, xmm3, xmm4, Assembler::AVX_128bit); |
|
565 |
movl(rax, 0xff00); |
|
566 |
movdl(xmm4, rax); |
|
567 |
vpshufb(xmm4, xmm4, xmm3, Assembler::AVX_128bit); |
|
568 |
movdqu(xmm5, ExternalAddress(StubRoutines::x86::ghash_polynomial_addr())); |
|
569 |
vpand(xmm5, xmm5, xmm4, Assembler::AVX_128bit); |
|
570 |
vpsrld(xmm3, t, 31, Assembler::AVX_128bit); |
|
571 |
vpslld(xmm4, t, 1, Assembler::AVX_128bit); |
|
572 |
vpslldq(xmm3, xmm3, 4, Assembler::AVX_128bit); |
|
573 |
vpxor(t, xmm4, xmm3, Assembler::AVX_128bit);// t holds p(x) <<1 or H * 2 |
|
574 |
||
575 |
//Adding p(x)<<1 to xmm5 which holds the reduction polynomial |
|
576 |
vpxor(t, t, xmm5, Assembler::AVX_128bit); |
|
577 |
movdqu(Address(htbl, 1 * 16), t); // H * 2 |
|
578 |
||
579 |
ret(0); |
|
580 |
} |
|
581 |
||
582 |
// This method takes the subkey after expansion as input and generates the remaining powers of subkey H. |
|
583 |
// The power of H is used in reduction process for eight block ghash |
|
584 |
void MacroAssembler::generateHtbl_eight_blocks(Register htbl) { |
|
585 |
const XMMRegister t = xmm13; |
|
586 |
const XMMRegister tmp0 = xmm1; |
|
587 |
Label GFMUL; |
|
588 |
||
589 |
movdqu(t, Address(htbl, 1 * 16)); |
|
590 |
movdqu(tmp0, t); |
|
591 |
||
592 |
// tmp0 and t hold H. Now we compute powers of H by using GFMUL(H, H) |
|
593 |
call(GFMUL, relocInfo::none); |
|
594 |
movdqu(Address(htbl, 2 * 16), t); //H ^ 2 * 2 |
|
595 |
call(GFMUL, relocInfo::none); |
|
596 |
movdqu(Address(htbl, 3 * 16), t); //H ^ 3 * 2 |
|
597 |
call(GFMUL, relocInfo::none); |
|
598 |
movdqu(Address(htbl, 4 * 16), t); //H ^ 4 * 2 |
|
599 |
call(GFMUL, relocInfo::none); |
|
600 |
movdqu(Address(htbl, 5 * 16), t); //H ^ 5 * 2 |
|
601 |
call(GFMUL, relocInfo::none); |
|
602 |
movdqu(Address(htbl, 6 * 16), t); //H ^ 6 * 2 |
|
603 |
call(GFMUL, relocInfo::none); |
|
604 |
movdqu(Address(htbl, 7 * 16), t); //H ^ 7 * 2 |
|
605 |
call(GFMUL, relocInfo::none); |
|
606 |
movdqu(Address(htbl, 8 * 16), t); //H ^ 8 * 2 |
|
607 |
ret(0); |
|
608 |
||
609 |
bind(GFMUL); |
|
610 |
gfmul(tmp0, t); |
|
611 |
} |
|
612 |
||
613 |
// Multiblock and single block GHASH computation using Shift XOR reduction technique |
|
614 |
void MacroAssembler::avx_ghash(Register input_state, Register htbl, |
|
615 |
Register input_data, Register blocks) { |
|
616 |
||
617 |
// temporary variables to hold input data and input state |
|
618 |
const XMMRegister data = xmm1; |
|
619 |
const XMMRegister state = xmm0; |
|
620 |
// temporary variables to hold intermediate results |
|
621 |
const XMMRegister tmp0 = xmm3; |
|
622 |
const XMMRegister tmp1 = xmm4; |
|
623 |
const XMMRegister tmp2 = xmm5; |
|
624 |
const XMMRegister tmp3 = xmm6; |
|
625 |
// temporary variables to hold byte and long swap masks |
|
626 |
const XMMRegister bswap_mask = xmm2; |
|
627 |
const XMMRegister lswap_mask = xmm14; |
|
628 |
||
629 |
Label GENERATE_HTBL_1_BLK, GENERATE_HTBL_8_BLKS, BEGIN_PROCESS, GFMUL, BLOCK8_REDUCTION, |
|
630 |
ONE_BLK_INIT, PROCESS_1_BLOCK, PROCESS_8_BLOCKS, SAVE_STATE, EXIT_GHASH; |
|
631 |
||
632 |
testptr(blocks, blocks); |
|
633 |
jcc(Assembler::zero, EXIT_GHASH); |
|
634 |
||
635 |
// Check if Hashtable (1*16) has been already generated |
|
636 |
// For anything less than 8 blocks, we generate only the first power of H. |
|
637 |
movdqu(tmp2, Address(htbl, 1 * 16)); |
|
638 |
ptest(tmp2, tmp2); |
|
639 |
jcc(Assembler::notZero, BEGIN_PROCESS); |
|
640 |
call(GENERATE_HTBL_1_BLK, relocInfo::none); |
|
641 |
||
642 |
// Shuffle the input state |
|
643 |
bind(BEGIN_PROCESS); |
|
644 |
movdqu(lswap_mask, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); |
|
645 |
movdqu(state, Address(input_state, 0)); |
|
646 |
vpshufb(state, state, lswap_mask, Assembler::AVX_128bit); |
|
647 |
||
648 |
cmpl(blocks, 8); |
|
649 |
jcc(Assembler::below, ONE_BLK_INIT); |
|
650 |
// If we have 8 blocks or more data, then generate remaining powers of H |
|
651 |
movdqu(tmp2, Address(htbl, 8 * 16)); |
|
652 |
ptest(tmp2, tmp2); |
|
653 |
jcc(Assembler::notZero, PROCESS_8_BLOCKS); |
|
654 |
call(GENERATE_HTBL_8_BLKS, relocInfo::none); |
|
655 |
||
656 |
//Do 8 multiplies followed by a reduction processing 8 blocks of data at a time |
|
657 |
//Each block = 16 bytes. |
|
658 |
bind(PROCESS_8_BLOCKS); |
|
659 |
subl(blocks, 8); |
|
660 |
movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
|
661 |
movdqu(data, Address(input_data, 16 * 7)); |
|
662 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
663 |
//Loading 1*16 as calculated powers of H required starts at that location. |
|
664 |
movdqu(xmm15, Address(htbl, 1 * 16)); |
|
665 |
//Perform carryless multiplication of (H*2, data block #7) |
|
666 |
vpclmulhqlqdq(tmp2, data, xmm15);//a0 * b1 |
|
667 |
vpclmulldq(tmp0, data, xmm15);//a0 * b0 |
|
668 |
vpclmulhdq(tmp1, data, xmm15);//a1 * b1 |
|
669 |
vpclmullqhqdq(tmp3, data, xmm15);//a1* b0 |
|
670 |
vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);// (a0 * b1) + (a1 * b0) |
|
671 |
||
672 |
movdqu(data, Address(input_data, 16 * 6)); |
|
673 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
674 |
// Perform carryless multiplication of (H^2 * 2, data block #6) |
|
675 |
schoolbookAAD(2, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
676 |
||
677 |
movdqu(data, Address(input_data, 16 * 5)); |
|
678 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
679 |
// Perform carryless multiplication of (H^3 * 2, data block #5) |
|
680 |
schoolbookAAD(3, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
681 |
movdqu(data, Address(input_data, 16 * 4)); |
|
682 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
683 |
// Perform carryless multiplication of (H^4 * 2, data block #4) |
|
684 |
schoolbookAAD(4, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
685 |
movdqu(data, Address(input_data, 16 * 3)); |
|
686 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
687 |
// Perform carryless multiplication of (H^5 * 2, data block #3) |
|
688 |
schoolbookAAD(5, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
689 |
movdqu(data, Address(input_data, 16 * 2)); |
|
690 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
691 |
// Perform carryless multiplication of (H^6 * 2, data block #2) |
|
692 |
schoolbookAAD(6, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
693 |
movdqu(data, Address(input_data, 16 * 1)); |
|
694 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
695 |
// Perform carryless multiplication of (H^7 * 2, data block #1) |
|
696 |
schoolbookAAD(7, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
697 |
movdqu(data, Address(input_data, 16 * 0)); |
|
698 |
// xor data block#0 with input state before perfoming carry-less multiplication |
|
699 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
700 |
vpxor(data, data, state, Assembler::AVX_128bit); |
|
701 |
// Perform carryless multiplication of (H^8 * 2, data block #0) |
|
702 |
schoolbookAAD(8, htbl, data, tmp0, tmp1, tmp2, tmp3); |
|
703 |
vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit); |
|
704 |
vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit); |
|
705 |
vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);// tmp0, tmp1 contains aggregated results of |
|
706 |
vpxor(tmp1, tmp1, tmp2, Assembler::AVX_128bit);// the multiplication operation |
|
707 |
||
708 |
// we have the 2 128-bit partially accumulated multiplication results in tmp0:tmp1 |
|
709 |
// with higher 128-bit in tmp1 and lower 128-bit in corresponding tmp0 |
|
710 |
// Follows the reduction technique mentioned in |
|
711 |
// Shift-XOR reduction described in Gueron-Kounavis May 2010 |
|
712 |
bind(BLOCK8_REDUCTION); |
|
713 |
// First Phase of the reduction |
|
714 |
vpslld(xmm8, tmp0, 31, Assembler::AVX_128bit); // packed right shifting << 31 |
|
715 |
vpslld(xmm9, tmp0, 30, Assembler::AVX_128bit); // packed right shifting << 30 |
|
716 |
vpslld(xmm10, tmp0, 25, Assembler::AVX_128bit); // packed right shifting << 25 |
|
717 |
// xor the shifted versions |
|
718 |
vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit); |
|
719 |
vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit); |
|
720 |
||
721 |
vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit); |
|
722 |
vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit); |
|
723 |
||
724 |
vpxor(tmp0, tmp0, xmm9, Assembler::AVX_128bit); // first phase of reduction is complete |
|
725 |
// second phase of the reduction |
|
726 |
vpsrld(xmm9, tmp0, 1, Assembler::AVX_128bit); // packed left shifting >> 1 |
|
727 |
vpsrld(xmm10, tmp0, 2, Assembler::AVX_128bit); // packed left shifting >> 2 |
|
728 |
vpsrld(tmp2, tmp0, 7, Assembler::AVX_128bit); // packed left shifting >> 7 |
|
729 |
// xor the shifted versions |
|
730 |
vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit); |
|
731 |
vpxor(xmm9, xmm9, tmp2, Assembler::AVX_128bit); |
|
732 |
vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit); |
|
733 |
vpxor(tmp0, xmm9, tmp0, Assembler::AVX_128bit); |
|
734 |
// Final result is in state |
|
735 |
vpxor(state, tmp0, tmp1, Assembler::AVX_128bit); |
|
736 |
||
737 |
lea(input_data, Address(input_data, 16 * 8)); |
|
738 |
cmpl(blocks, 8); |
|
739 |
jcc(Assembler::below, ONE_BLK_INIT); |
|
740 |
jmp(PROCESS_8_BLOCKS); |
|
741 |
||
742 |
// Since this is one block operation we will only use H * 2 i.e. the first power of H |
|
743 |
bind(ONE_BLK_INIT); |
|
744 |
movdqu(tmp0, Address(htbl, 1 * 16)); |
|
745 |
movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); |
|
746 |
||
747 |
//Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a reduction. |
|
748 |
bind(PROCESS_1_BLOCK); |
|
749 |
cmpl(blocks, 0); |
|
750 |
jcc(Assembler::equal, SAVE_STATE); |
|
751 |
subl(blocks, 1); |
|
752 |
movdqu(data, Address(input_data, 0)); |
|
753 |
vpshufb(data, data, bswap_mask, Assembler::AVX_128bit); |
|
754 |
vpxor(state, state, data, Assembler::AVX_128bit); |
|
755 |
// gfmul(H*2, state) |
|
756 |
call(GFMUL, relocInfo::none); |
|
757 |
addptr(input_data, 16); |
|
758 |
jmp(PROCESS_1_BLOCK); |
|
759 |
||
760 |
bind(SAVE_STATE); |
|
761 |
vpshufb(state, state, lswap_mask, Assembler::AVX_128bit); |
|
762 |
movdqu(Address(input_state, 0), state); |
|
763 |
jmp(EXIT_GHASH); |
|
764 |
||
765 |
bind(GFMUL); |
|
766 |
gfmul(tmp0, state); |
|
767 |
||
768 |
bind(GENERATE_HTBL_1_BLK); |
|
769 |
generateHtbl_one_block(htbl); |
|
770 |
||
771 |
bind(GENERATE_HTBL_8_BLKS); |
|
772 |
generateHtbl_eight_blocks(htbl); |
|
773 |
||
774 |
bind(EXIT_GHASH); |
|
775 |
// zero out xmm registers used for Htbl storage |
|
776 |
vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit); |
|
777 |
vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit); |
|
778 |
vpxor(xmm3, xmm3, xmm3, Assembler::AVX_128bit); |
|
779 |
vpxor(xmm15, xmm15, xmm15, Assembler::AVX_128bit); |
|
53017
e10a1f7aaa13
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents:
52990
diff
changeset
|
780 |
} |
e10a1f7aaa13
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents:
52990
diff
changeset
|
781 |
#endif // _LP64 |