author | kvn |
Wed, 21 Sep 2016 13:47:56 -0700 | |
changeset 41333 | ce08d64b41c7 |
parent 38135 | e06e2d071465 |
child 42039 | db627462f2c9 |
permissions | -rw-r--r-- |
36555 | 1 |
/* |
2 |
* Copyright (c) 2016, Intel Corporation. |
|
3 |
* |
|
4 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
5 |
* |
|
6 |
* This code is free software; you can redistribute it and/or modify it |
|
7 |
* under the terms of the GNU General Public License version 2 only, as |
|
8 |
* published by the Free Software Foundation. |
|
9 |
* |
|
10 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
14 |
* accompanied this code). |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU General Public License version |
|
17 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
18 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
* |
|
20 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 |
* or visit www.oracle.com if you need additional information or have any |
|
22 |
* questions. |
|
23 |
* |
|
24 |
*/ |
|
25 |
||
26 |
#include "precompiled.hpp" |
|
27 |
#include "asm/assembler.hpp" |
|
28 |
#include "asm/assembler.inline.hpp" |
|
29 |
#include "runtime/stubRoutines.hpp" |
|
30 |
#include "macroAssembler_x86.hpp" |
|
31 |
||
32 |
// ofs and limit are used for multi-block byte array. |
|
33 |
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) |
|
34 |
void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, |
|
35 |
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, |
|
36 |
Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) { |
|
37 |
||
38 |
Label start, done_hash, loop0; |
|
39 |
||
40 |
address upper_word_mask = StubRoutines::x86::upper_word_mask_addr(); |
|
41 |
address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr(); |
|
42 |
||
43 |
bind(start); |
|
44 |
movdqu(abcd, Address(state, 0)); |
|
45 |
pinsrd(e0, Address(state, 16), 3); |
|
46 |
movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000 |
|
47 |
pand(e0, shuf_mask); |
|
48 |
pshufd(abcd, abcd, 0x1B); |
|
49 |
movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f |
|
50 |
||
51 |
bind(loop0); |
|
52 |
// Save hash values for addition after rounds |
|
53 |
movdqu(Address(rsp, 0), e0); |
|
54 |
movdqu(Address(rsp, 16), abcd); |
|
55 |
||
56 |
||
57 |
// Rounds 0 - 3 |
|
58 |
movdqu(msg0, Address(buf, 0)); |
|
59 |
pshufb(msg0, shuf_mask); |
|
60 |
paddd(e0, msg0); |
|
61 |
movdqa(e1, abcd); |
|
62 |
sha1rnds4(abcd, e0, 0); |
|
63 |
||
64 |
// Rounds 4 - 7 |
|
65 |
movdqu(msg1, Address(buf, 16)); |
|
66 |
pshufb(msg1, shuf_mask); |
|
67 |
sha1nexte(e1, msg1); |
|
68 |
movdqa(e0, abcd); |
|
69 |
sha1rnds4(abcd, e1, 0); |
|
70 |
sha1msg1(msg0, msg1); |
|
71 |
||
72 |
// Rounds 8 - 11 |
|
73 |
movdqu(msg2, Address(buf, 32)); |
|
74 |
pshufb(msg2, shuf_mask); |
|
75 |
sha1nexte(e0, msg2); |
|
76 |
movdqa(e1, abcd); |
|
77 |
sha1rnds4(abcd, e0, 0); |
|
78 |
sha1msg1(msg1, msg2); |
|
79 |
pxor(msg0, msg2); |
|
80 |
||
81 |
// Rounds 12 - 15 |
|
82 |
movdqu(msg3, Address(buf, 48)); |
|
83 |
pshufb(msg3, shuf_mask); |
|
84 |
sha1nexte(e1, msg3); |
|
85 |
movdqa(e0, abcd); |
|
86 |
sha1msg2(msg0, msg3); |
|
87 |
sha1rnds4(abcd, e1, 0); |
|
88 |
sha1msg1(msg2, msg3); |
|
89 |
pxor(msg1, msg3); |
|
90 |
||
91 |
// Rounds 16 - 19 |
|
92 |
sha1nexte(e0, msg0); |
|
93 |
movdqa(e1, abcd); |
|
94 |
sha1msg2(msg1, msg0); |
|
95 |
sha1rnds4(abcd, e0, 0); |
|
96 |
sha1msg1(msg3, msg0); |
|
97 |
pxor(msg2, msg0); |
|
98 |
||
99 |
// Rounds 20 - 23 |
|
100 |
sha1nexte(e1, msg1); |
|
101 |
movdqa(e0, abcd); |
|
102 |
sha1msg2(msg2, msg1); |
|
103 |
sha1rnds4(abcd, e1, 1); |
|
104 |
sha1msg1(msg0, msg1); |
|
105 |
pxor(msg3, msg1); |
|
106 |
||
107 |
// Rounds 24 - 27 |
|
108 |
sha1nexte(e0, msg2); |
|
109 |
movdqa(e1, abcd); |
|
110 |
sha1msg2(msg3, msg2); |
|
111 |
sha1rnds4(abcd, e0, 1); |
|
112 |
sha1msg1(msg1, msg2); |
|
113 |
pxor(msg0, msg2); |
|
114 |
||
115 |
// Rounds 28 - 31 |
|
116 |
sha1nexte(e1, msg3); |
|
117 |
movdqa(e0, abcd); |
|
118 |
sha1msg2(msg0, msg3); |
|
119 |
sha1rnds4(abcd, e1, 1); |
|
120 |
sha1msg1(msg2, msg3); |
|
121 |
pxor(msg1, msg3); |
|
122 |
||
123 |
// Rounds 32 - 35 |
|
124 |
sha1nexte(e0, msg0); |
|
125 |
movdqa(e1, abcd); |
|
126 |
sha1msg2(msg1, msg0); |
|
127 |
sha1rnds4(abcd, e0, 1); |
|
128 |
sha1msg1(msg3, msg0); |
|
129 |
pxor(msg2, msg0); |
|
130 |
||
131 |
// Rounds 36 - 39 |
|
132 |
sha1nexte(e1, msg1); |
|
133 |
movdqa(e0, abcd); |
|
134 |
sha1msg2(msg2, msg1); |
|
135 |
sha1rnds4(abcd, e1, 1); |
|
136 |
sha1msg1(msg0, msg1); |
|
137 |
pxor(msg3, msg1); |
|
138 |
||
139 |
// Rounds 40 - 43 |
|
140 |
sha1nexte(e0, msg2); |
|
141 |
movdqa(e1, abcd); |
|
142 |
sha1msg2(msg3, msg2); |
|
143 |
sha1rnds4(abcd, e0, 2); |
|
144 |
sha1msg1(msg1, msg2); |
|
145 |
pxor(msg0, msg2); |
|
146 |
||
147 |
// Rounds 44 - 47 |
|
148 |
sha1nexte(e1, msg3); |
|
149 |
movdqa(e0, abcd); |
|
150 |
sha1msg2(msg0, msg3); |
|
151 |
sha1rnds4(abcd, e1, 2); |
|
152 |
sha1msg1(msg2, msg3); |
|
153 |
pxor(msg1, msg3); |
|
154 |
||
155 |
// Rounds 48 - 51 |
|
156 |
sha1nexte(e0, msg0); |
|
157 |
movdqa(e1, abcd); |
|
158 |
sha1msg2(msg1, msg0); |
|
159 |
sha1rnds4(abcd, e0, 2); |
|
160 |
sha1msg1(msg3, msg0); |
|
161 |
pxor(msg2, msg0); |
|
162 |
||
163 |
// Rounds 52 - 55 |
|
164 |
sha1nexte(e1, msg1); |
|
165 |
movdqa(e0, abcd); |
|
166 |
sha1msg2(msg2, msg1); |
|
167 |
sha1rnds4(abcd, e1, 2); |
|
168 |
sha1msg1(msg0, msg1); |
|
169 |
pxor(msg3, msg1); |
|
170 |
||
171 |
// Rounds 56 - 59 |
|
172 |
sha1nexte(e0, msg2); |
|
173 |
movdqa(e1, abcd); |
|
174 |
sha1msg2(msg3, msg2); |
|
175 |
sha1rnds4(abcd, e0, 2); |
|
176 |
sha1msg1(msg1, msg2); |
|
177 |
pxor(msg0, msg2); |
|
178 |
||
179 |
// Rounds 60 - 63 |
|
180 |
sha1nexte(e1, msg3); |
|
181 |
movdqa(e0, abcd); |
|
182 |
sha1msg2(msg0, msg3); |
|
183 |
sha1rnds4(abcd, e1, 3); |
|
184 |
sha1msg1(msg2, msg3); |
|
185 |
pxor(msg1, msg3); |
|
186 |
||
187 |
// Rounds 64 - 67 |
|
188 |
sha1nexte(e0, msg0); |
|
189 |
movdqa(e1, abcd); |
|
190 |
sha1msg2(msg1, msg0); |
|
191 |
sha1rnds4(abcd, e0, 3); |
|
192 |
sha1msg1(msg3, msg0); |
|
193 |
pxor(msg2, msg0); |
|
194 |
||
195 |
// Rounds 68 - 71 |
|
196 |
sha1nexte(e1, msg1); |
|
197 |
movdqa(e0, abcd); |
|
198 |
sha1msg2(msg2, msg1); |
|
199 |
sha1rnds4(abcd, e1, 3); |
|
200 |
pxor(msg3, msg1); |
|
201 |
||
202 |
// Rounds 72 - 75 |
|
203 |
sha1nexte(e0, msg2); |
|
204 |
movdqa(e1, abcd); |
|
205 |
sha1msg2(msg3, msg2); |
|
206 |
sha1rnds4(abcd, e0, 3); |
|
207 |
||
208 |
// Rounds 76 - 79 |
|
209 |
sha1nexte(e1, msg3); |
|
210 |
movdqa(e0, abcd); |
|
211 |
sha1rnds4(abcd, e1, 3); |
|
212 |
||
213 |
// add current hash values with previously saved |
|
214 |
movdqu(msg0, Address(rsp, 0)); |
|
215 |
sha1nexte(e0, msg0); |
|
216 |
movdqu(msg0, Address(rsp, 16)); |
|
217 |
paddd(abcd, msg0); |
|
218 |
||
219 |
if (multi_block) { |
|
220 |
// increment data pointer and loop if more to process |
|
221 |
addptr(buf, 64); |
|
222 |
addptr(ofs, 64); |
|
223 |
cmpptr(ofs, limit); |
|
224 |
jcc(Assembler::belowEqual, loop0); |
|
225 |
movptr(rax, ofs); //return ofs |
|
226 |
} |
|
227 |
// write hash values back in the correct order |
|
228 |
pshufd(abcd, abcd, 0x1b); |
|
229 |
movdqu(Address(state, 0), abcd); |
|
230 |
pextrd(Address(state, 16), e0, 3); |
|
231 |
||
232 |
bind(done_hash); |
|
233 |
||
234 |
} |
|
235 |
||
236 |
// xmm0 (msg) is used as an implicit argument to sh256rnds2 |
|
237 |
// and state0 and state1 can never use xmm0 register. |
|
238 |
// ofs and limit are used for multi-block byte array. |
|
239 |
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) |
|
240 |
#ifdef _LP64 |
|
241 |
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
|
242 |
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
|
243 |
Register buf, Register state, Register ofs, Register limit, Register rsp, |
|
244 |
bool multi_block, XMMRegister shuf_mask) { |
|
245 |
#else |
|
246 |
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
|
247 |
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
|
248 |
Register buf, Register state, Register ofs, Register limit, Register rsp, |
|
249 |
bool multi_block) { |
|
250 |
#endif |
|
251 |
Label start, done_hash, loop0; |
|
252 |
||
253 |
address K256 = StubRoutines::x86::k256_addr(); |
|
254 |
address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr(); |
|
255 |
||
256 |
bind(start); |
|
257 |
movdqu(state0, Address(state, 0)); |
|
258 |
movdqu(state1, Address(state, 16)); |
|
259 |
||
260 |
pshufd(state0, state0, 0xB1); |
|
261 |
pshufd(state1, state1, 0x1B); |
|
262 |
movdqa(msgtmp4, state0); |
|
263 |
palignr(state0, state1, 8); |
|
264 |
pblendw(state1, msgtmp4, 0xF0); |
|
265 |
||
266 |
#ifdef _LP64 |
|
267 |
movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask)); |
|
268 |
#endif |
|
269 |
lea(rax, ExternalAddress(K256)); |
|
270 |
||
271 |
bind(loop0); |
|
272 |
movdqu(Address(rsp, 0), state0); |
|
273 |
movdqu(Address(rsp, 16), state1); |
|
274 |
||
275 |
// Rounds 0-3 |
|
276 |
movdqu(msg, Address(buf, 0)); |
|
277 |
#ifdef _LP64 |
|
278 |
pshufb(msg, shuf_mask); |
|
279 |
#else |
|
280 |
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); |
|
281 |
#endif |
|
282 |
movdqa(msgtmp0, msg); |
|
283 |
paddd(msg, Address(rax, 0)); |
|
284 |
sha256rnds2(state1, state0); |
|
285 |
pshufd(msg, msg, 0x0E); |
|
286 |
sha256rnds2(state0, state1); |
|
287 |
||
288 |
// Rounds 4-7 |
|
289 |
movdqu(msg, Address(buf, 16)); |
|
290 |
#ifdef _LP64 |
|
291 |
pshufb(msg, shuf_mask); |
|
292 |
#else |
|
293 |
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); |
|
294 |
#endif |
|
295 |
movdqa(msgtmp1, msg); |
|
296 |
paddd(msg, Address(rax, 16)); |
|
297 |
sha256rnds2(state1, state0); |
|
298 |
pshufd(msg, msg, 0x0E); |
|
299 |
sha256rnds2(state0, state1); |
|
300 |
sha256msg1(msgtmp0, msgtmp1); |
|
301 |
||
302 |
// Rounds 8-11 |
|
303 |
movdqu(msg, Address(buf, 32)); |
|
304 |
#ifdef _LP64 |
|
305 |
pshufb(msg, shuf_mask); |
|
306 |
#else |
|
307 |
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); |
|
308 |
#endif |
|
309 |
movdqa(msgtmp2, msg); |
|
310 |
paddd(msg, Address(rax, 32)); |
|
311 |
sha256rnds2(state1, state0); |
|
312 |
pshufd(msg, msg, 0x0E); |
|
313 |
sha256rnds2(state0, state1); |
|
314 |
sha256msg1(msgtmp1, msgtmp2); |
|
315 |
||
316 |
// Rounds 12-15 |
|
317 |
movdqu(msg, Address(buf, 48)); |
|
318 |
#ifdef _LP64 |
|
319 |
pshufb(msg, shuf_mask); |
|
320 |
#else |
|
321 |
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); |
|
322 |
#endif |
|
323 |
movdqa(msgtmp3, msg); |
|
324 |
paddd(msg, Address(rax, 48)); |
|
325 |
sha256rnds2(state1, state0); |
|
326 |
movdqa(msgtmp4, msgtmp3); |
|
327 |
palignr(msgtmp4, msgtmp2, 4); |
|
328 |
paddd(msgtmp0, msgtmp4); |
|
329 |
sha256msg2(msgtmp0, msgtmp3); |
|
330 |
pshufd(msg, msg, 0x0E); |
|
331 |
sha256rnds2(state0, state1); |
|
332 |
sha256msg1(msgtmp2, msgtmp3); |
|
333 |
||
334 |
// Rounds 16-19 |
|
335 |
movdqa(msg, msgtmp0); |
|
336 |
paddd(msg, Address(rax, 64)); |
|
337 |
sha256rnds2(state1, state0); |
|
338 |
movdqa(msgtmp4, msgtmp0); |
|
339 |
palignr(msgtmp4, msgtmp3, 4); |
|
340 |
paddd(msgtmp1, msgtmp4); |
|
341 |
sha256msg2(msgtmp1, msgtmp0); |
|
342 |
pshufd(msg, msg, 0x0E); |
|
343 |
sha256rnds2(state0, state1); |
|
344 |
sha256msg1(msgtmp3, msgtmp0); |
|
345 |
||
346 |
// Rounds 20-23 |
|
347 |
movdqa(msg, msgtmp1); |
|
348 |
paddd(msg, Address(rax, 80)); |
|
349 |
sha256rnds2(state1, state0); |
|
350 |
movdqa(msgtmp4, msgtmp1); |
|
351 |
palignr(msgtmp4, msgtmp0, 4); |
|
352 |
paddd(msgtmp2, msgtmp4); |
|
353 |
sha256msg2(msgtmp2, msgtmp1); |
|
354 |
pshufd(msg, msg, 0x0E); |
|
355 |
sha256rnds2(state0, state1); |
|
356 |
sha256msg1(msgtmp0, msgtmp1); |
|
357 |
||
358 |
// Rounds 24-27 |
|
359 |
movdqa(msg, msgtmp2); |
|
360 |
paddd(msg, Address(rax, 96)); |
|
361 |
sha256rnds2(state1, state0); |
|
362 |
movdqa(msgtmp4, msgtmp2); |
|
363 |
palignr(msgtmp4, msgtmp1, 4); |
|
364 |
paddd(msgtmp3, msgtmp4); |
|
365 |
sha256msg2(msgtmp3, msgtmp2); |
|
366 |
pshufd(msg, msg, 0x0E); |
|
367 |
sha256rnds2(state0, state1); |
|
368 |
sha256msg1(msgtmp1, msgtmp2); |
|
369 |
||
370 |
// Rounds 28-31 |
|
371 |
movdqa(msg, msgtmp3); |
|
372 |
paddd(msg, Address(rax, 112)); |
|
373 |
sha256rnds2(state1, state0); |
|
374 |
movdqa(msgtmp4, msgtmp3); |
|
375 |
palignr(msgtmp4, msgtmp2, 4); |
|
376 |
paddd(msgtmp0, msgtmp4); |
|
377 |
sha256msg2(msgtmp0, msgtmp3); |
|
378 |
pshufd(msg, msg, 0x0E); |
|
379 |
sha256rnds2(state0, state1); |
|
380 |
sha256msg1(msgtmp2, msgtmp3); |
|
381 |
||
382 |
// Rounds 32-35 |
|
383 |
movdqa(msg, msgtmp0); |
|
384 |
paddd(msg, Address(rax, 128)); |
|
385 |
sha256rnds2(state1, state0); |
|
386 |
movdqa(msgtmp4, msgtmp0); |
|
387 |
palignr(msgtmp4, msgtmp3, 4); |
|
388 |
paddd(msgtmp1, msgtmp4); |
|
389 |
sha256msg2(msgtmp1, msgtmp0); |
|
390 |
pshufd(msg, msg, 0x0E); |
|
391 |
sha256rnds2(state0, state1); |
|
392 |
sha256msg1(msgtmp3, msgtmp0); |
|
393 |
||
394 |
// Rounds 36-39 |
|
395 |
movdqa(msg, msgtmp1); |
|
396 |
paddd(msg, Address(rax, 144)); |
|
397 |
sha256rnds2(state1, state0); |
|
398 |
movdqa(msgtmp4, msgtmp1); |
|
399 |
palignr(msgtmp4, msgtmp0, 4); |
|
400 |
paddd(msgtmp2, msgtmp4); |
|
401 |
sha256msg2(msgtmp2, msgtmp1); |
|
402 |
pshufd(msg, msg, 0x0E); |
|
403 |
sha256rnds2(state0, state1); |
|
404 |
sha256msg1(msgtmp0, msgtmp1); |
|
405 |
||
406 |
// Rounds 40-43 |
|
407 |
movdqa(msg, msgtmp2); |
|
408 |
paddd(msg, Address(rax, 160)); |
|
409 |
sha256rnds2(state1, state0); |
|
410 |
movdqa(msgtmp4, msgtmp2); |
|
411 |
palignr(msgtmp4, msgtmp1, 4); |
|
412 |
paddd(msgtmp3, msgtmp4); |
|
413 |
sha256msg2(msgtmp3, msgtmp2); |
|
414 |
pshufd(msg, msg, 0x0E); |
|
415 |
sha256rnds2(state0, state1); |
|
416 |
sha256msg1(msgtmp1, msgtmp2); |
|
417 |
||
418 |
// Rounds 44-47 |
|
419 |
movdqa(msg, msgtmp3); |
|
420 |
paddd(msg, Address(rax, 176)); |
|
421 |
sha256rnds2(state1, state0); |
|
422 |
movdqa(msgtmp4, msgtmp3); |
|
423 |
palignr(msgtmp4, msgtmp2, 4); |
|
424 |
paddd(msgtmp0, msgtmp4); |
|
425 |
sha256msg2(msgtmp0, msgtmp3); |
|
426 |
pshufd(msg, msg, 0x0E); |
|
427 |
sha256rnds2(state0, state1); |
|
428 |
sha256msg1(msgtmp2, msgtmp3); |
|
429 |
||
430 |
// Rounds 48-51 |
|
431 |
movdqa(msg, msgtmp0); |
|
432 |
paddd(msg, Address(rax, 192)); |
|
433 |
sha256rnds2(state1, state0); |
|
434 |
movdqa(msgtmp4, msgtmp0); |
|
435 |
palignr(msgtmp4, msgtmp3, 4); |
|
436 |
paddd(msgtmp1, msgtmp4); |
|
437 |
sha256msg2(msgtmp1, msgtmp0); |
|
438 |
pshufd(msg, msg, 0x0E); |
|
439 |
sha256rnds2(state0, state1); |
|
440 |
sha256msg1(msgtmp3, msgtmp0); |
|
441 |
||
442 |
// Rounds 52-55 |
|
443 |
movdqa(msg, msgtmp1); |
|
444 |
paddd(msg, Address(rax, 208)); |
|
445 |
sha256rnds2(state1, state0); |
|
446 |
movdqa(msgtmp4, msgtmp1); |
|
447 |
palignr(msgtmp4, msgtmp0, 4); |
|
448 |
paddd(msgtmp2, msgtmp4); |
|
449 |
sha256msg2(msgtmp2, msgtmp1); |
|
450 |
pshufd(msg, msg, 0x0E); |
|
451 |
sha256rnds2(state0, state1); |
|
452 |
||
453 |
// Rounds 56-59 |
|
454 |
movdqa(msg, msgtmp2); |
|
455 |
paddd(msg, Address(rax, 224)); |
|
456 |
sha256rnds2(state1, state0); |
|
457 |
movdqa(msgtmp4, msgtmp2); |
|
458 |
palignr(msgtmp4, msgtmp1, 4); |
|
459 |
paddd(msgtmp3, msgtmp4); |
|
460 |
sha256msg2(msgtmp3, msgtmp2); |
|
461 |
pshufd(msg, msg, 0x0E); |
|
462 |
sha256rnds2(state0, state1); |
|
463 |
||
464 |
// Rounds 60-63 |
|
465 |
movdqa(msg, msgtmp3); |
|
466 |
paddd(msg, Address(rax, 240)); |
|
467 |
sha256rnds2(state1, state0); |
|
468 |
pshufd(msg, msg, 0x0E); |
|
469 |
sha256rnds2(state0, state1); |
|
470 |
movdqu(msg, Address(rsp, 0)); |
|
471 |
paddd(state0, msg); |
|
472 |
movdqu(msg, Address(rsp, 16)); |
|
473 |
paddd(state1, msg); |
|
474 |
||
475 |
if (multi_block) { |
|
476 |
// increment data pointer and loop if more to process |
|
477 |
addptr(buf, 64); |
|
478 |
addptr(ofs, 64); |
|
479 |
cmpptr(ofs, limit); |
|
480 |
jcc(Assembler::belowEqual, loop0); |
|
481 |
movptr(rax, ofs); //return ofs |
|
482 |
} |
|
483 |
||
484 |
pshufd(state0, state0, 0x1B); |
|
485 |
pshufd(state1, state1, 0xB1); |
|
486 |
movdqa(msgtmp4, state0); |
|
487 |
pblendw(state0, state1, 0xF0); |
|
488 |
palignr(state1, msgtmp4, 8); |
|
489 |
||
490 |
movdqu(Address(state, 0), state0); |
|
491 |
movdqu(Address(state, 16), state1); |
|
492 |
||
493 |
bind(done_hash); |
|
494 |
||
495 |
} |
|
38135
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
496 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
497 |
#ifdef _LP64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
498 |
/* |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
499 |
The algorithm below is based on Intel publication: |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
500 |
"Fast SHA-256 Implementations on Intelë Architecture Processors" by Jim Guilford, Kirk Yap and Vinodh Gopal. |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
501 |
The assembly code was originally provided by Sean Gulley and in many places preserves |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
502 |
the original assembly NAMES and comments to simplify matching Java assembly with its original. |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
503 |
The Java version was substantially redesigned to replace 1200 assembly instruction with |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
504 |
much shorter run-time generator of the same code in memory. |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
505 |
*/ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
506 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
507 |
void MacroAssembler::sha256_AVX2_one_round_compute( |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
508 |
Register reg_old_h, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
509 |
Register reg_a, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
510 |
Register reg_b, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
511 |
Register reg_c, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
512 |
Register reg_d, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
513 |
Register reg_e, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
514 |
Register reg_f, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
515 |
Register reg_g, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
516 |
Register reg_h, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
517 |
int iter) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
518 |
const Register& reg_y0 = r13; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
519 |
const Register& reg_y1 = r14; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
520 |
const Register& reg_y2 = r15; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
521 |
const Register& reg_y3 = rcx; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
522 |
const Register& reg_T1 = r12; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
523 |
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND iter ;;;;;;;;;;;;;;;;;;;;;;;;;;; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
524 |
if (iter%4 > 0) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
525 |
addl(reg_old_h, reg_y2); // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
526 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
527 |
movl(reg_y2, reg_f); // reg_y2 = reg_f ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
528 |
rorxd(reg_y0, reg_e, 25); // reg_y0 = reg_e >> 25 ; S1A |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
529 |
rorxd(reg_y1, reg_e, 11); // reg_y1 = reg_e >> 11 ; S1B |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
530 |
xorl(reg_y2, reg_g); // reg_y2 = reg_f^reg_g ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
531 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
532 |
xorl(reg_y0, reg_y1); // reg_y0 = (reg_e>>25) ^ (reg_h>>11) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
533 |
rorxd(reg_y1, reg_e, 6); // reg_y1 = (reg_e >> 6) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
534 |
andl(reg_y2, reg_e); // reg_y2 = (reg_f^reg_g)®_e ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
535 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
536 |
if (iter%4 > 0) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
537 |
addl(reg_old_h, reg_y3); // reg_h = t1 + S0 + MAJ ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
538 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
539 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
540 |
xorl(reg_y0, reg_y1); // reg_y0 = (reg_e>>25) ^ (reg_e>>11) ^ (reg_e>>6) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
541 |
rorxd(reg_T1, reg_a, 13); // reg_T1 = reg_a >> 13 ; S0B |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
542 |
xorl(reg_y2, reg_g); // reg_y2 = CH = ((reg_f^reg_g)®_e)^reg_g ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
543 |
rorxd(reg_y1, reg_a, 22); // reg_y1 = reg_a >> 22 ; S0A |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
544 |
movl(reg_y3, reg_a); // reg_y3 = reg_a ; MAJA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
545 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
546 |
xorl(reg_y1, reg_T1); // reg_y1 = (reg_a>>22) ^ (reg_a>>13) ; S0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
547 |
rorxd(reg_T1, reg_a, 2); // reg_T1 = (reg_a >> 2) ; S0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
548 |
addl(reg_h, Address(rsp, rdx, Address::times_1, 4*iter)); // reg_h = k + w + reg_h ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
549 |
orl(reg_y3, reg_c); // reg_y3 = reg_a|reg_c ; MAJA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
550 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
551 |
xorl(reg_y1, reg_T1); // reg_y1 = (reg_a>>22) ^ (reg_a>>13) ^ (reg_a>>2) ; S0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
552 |
movl(reg_T1, reg_a); // reg_T1 = reg_a ; MAJB |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
553 |
andl(reg_y3, reg_b); // reg_y3 = (reg_a|reg_c)®_b ; MAJA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
554 |
andl(reg_T1, reg_c); // reg_T1 = reg_a®_c ; MAJB |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
555 |
addl(reg_y2, reg_y0); // reg_y2 = S1 + CH ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
556 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
557 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
558 |
addl(reg_d, reg_h); // reg_d = k + w + reg_h + reg_d ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
559 |
orl(reg_y3, reg_T1); // reg_y3 = MAJ = (reg_a|reg_c)®_b)|(reg_a®_c) ; MAJ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
560 |
addl(reg_h, reg_y1); // reg_h = k + w + reg_h + S0 ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
561 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
562 |
addl(reg_d, reg_y2); // reg_d = k + w + reg_h + reg_d + S1 + CH = reg_d + t1 ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
563 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
564 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
565 |
if (iter%4 == 3) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
566 |
addl(reg_h, reg_y2); // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
567 |
addl(reg_h, reg_y3); // reg_h = t1 + S0 + MAJ ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
568 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
569 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
570 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
571 |
void MacroAssembler::sha256_AVX2_four_rounds_compute_first(int start) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
572 |
sha256_AVX2_one_round_compute(rax, rax, rbx, rdi, rsi, r8, r9, r10, r11, start + 0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
573 |
sha256_AVX2_one_round_compute(r11, r11, rax, rbx, rdi, rsi, r8, r9, r10, start + 1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
574 |
sha256_AVX2_one_round_compute(r10, r10, r11, rax, rbx, rdi, rsi, r8, r9, start + 2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
575 |
sha256_AVX2_one_round_compute(r9, r9, r10, r11, rax, rbx, rdi, rsi, r8, start + 3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
576 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
577 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
578 |
void MacroAssembler::sha256_AVX2_four_rounds_compute_last(int start) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
579 |
sha256_AVX2_one_round_compute(r8, r8, r9, r10, r11, rax, rbx, rdi, rsi, start + 0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
580 |
sha256_AVX2_one_round_compute(rsi, rsi, r8, r9, r10, r11, rax, rbx, rdi, start + 1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
581 |
sha256_AVX2_one_round_compute(rdi, rdi, rsi, r8, r9, r10, r11, rax, rbx, start + 2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
582 |
sha256_AVX2_one_round_compute(rbx, rbx, rdi, rsi, r8, r9, r10, r11, rax, start + 3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
583 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
584 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
585 |
void MacroAssembler::sha256_AVX2_one_round_and_sched( |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
586 |
XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
587 |
XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
588 |
XMMRegister xmm_2, /* ymm6 */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
589 |
XMMRegister xmm_3, /* ymm7 */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
590 |
Register reg_a, /* == rax on 0 iteration, then rotate 8 register right on each next iteration */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
591 |
Register reg_b, /* rbx */ /* full cycle is 8 iterations */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
592 |
Register reg_c, /* rdi */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
593 |
Register reg_d, /* rsi */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
594 |
Register reg_e, /* r8 */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
595 |
Register reg_f, /* r9d */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
596 |
Register reg_g, /* r10d */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
597 |
Register reg_h, /* r11d */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
598 |
int iter) |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
599 |
{ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
600 |
movl(rcx, reg_a); // rcx = reg_a ; MAJA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
601 |
rorxd(r13, reg_e, 25); // r13 = reg_e >> 25 ; S1A |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
602 |
rorxd(r14, reg_e, 11); // r14 = reg_e >> 11 ; S1B |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
603 |
addl(reg_h, Address(rsp, rdx, Address::times_1, 4*iter)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
604 |
orl(rcx, reg_c); // rcx = reg_a|reg_c ; MAJA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
605 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
606 |
movl(r15, reg_f); // r15 = reg_f ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
607 |
rorxd(r12, reg_a, 13); // r12 = reg_a >> 13 ; S0B |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
608 |
xorl(r13, r14); // r13 = (reg_e>>25) ^ (reg_e>>11) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
609 |
xorl(r15, reg_g); // r15 = reg_f^reg_g ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
610 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
611 |
rorxd(r14, reg_e, 6); // r14 = (reg_e >> 6) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
612 |
andl(r15, reg_e); // r15 = (reg_f^reg_g)®_e ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
613 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
614 |
xorl(r13, r14); // r13 = (reg_e>>25) ^ (reg_e>>11) ^ (reg_e>>6) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
615 |
rorxd(r14, reg_a, 22); // r14 = reg_a >> 22 ; S0A |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
616 |
addl(reg_d, reg_h); // reg_d = k + w + reg_h + reg_d ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
617 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
618 |
andl(rcx, reg_b); // rcx = (reg_a|reg_c)®_b ; MAJA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
619 |
xorl(r14, r12); // r14 = (reg_a>>22) ^ (reg_a>>13) ; S0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
620 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
621 |
rorxd(r12, reg_a, 2); // r12 = (reg_a >> 2) ; S0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
622 |
xorl(r15, reg_g); // r15 = CH = ((reg_f^reg_g)®_e)^reg_g ; CH |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
623 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
624 |
xorl(r14, r12); // r14 = (reg_a>>22) ^ (reg_a>>13) ^ (reg_a>>2) ; S0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
625 |
movl(r12, reg_a); // r12 = reg_a ; MAJB |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
626 |
andl(r12, reg_c); // r12 = reg_a®_c ; MAJB |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
627 |
addl(r15, r13); // r15 = S1 + CH ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
628 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
629 |
orl(rcx, r12); // rcx = MAJ = (reg_a|reg_c)®_b)|(reg_a®_c) ; MAJ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
630 |
addl(reg_h, r14); // reg_h = k + w + reg_h + S0 ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
631 |
addl(reg_d, r15); // reg_d = k + w + reg_h + reg_d + S1 + CH = reg_d + t1 ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
632 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
633 |
addl(reg_h, r15); // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
634 |
addl(reg_h, rcx); // reg_h = t1 + S0 + MAJ ; -- |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
635 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
636 |
if (iter%4 == 0) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
637 |
vpalignr(xmm0, xmm_3, xmm_2, 4, AVX_256bit); // ymm0 = W[-7] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
638 |
vpaddd(xmm0, xmm0, xmm_0, AVX_256bit); // ymm0 = W[-7] + W[-16]; y1 = (e >> 6) ; S1 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
639 |
vpalignr(xmm1, xmm_1, xmm_0, 4, AVX_256bit); // ymm1 = W[-15] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
640 |
vpsrld(xmm2, xmm1, 7, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
641 |
vpslld(xmm3, xmm1, 32-7, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
642 |
vpor(xmm3, xmm3, xmm2, AVX_256bit); // ymm3 = W[-15] ror 7 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
643 |
vpsrld(xmm2, xmm1,18, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
644 |
} else if (iter%4 == 1 ) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
645 |
vpsrld(xmm8, xmm1, 3, AVX_256bit); // ymm8 = W[-15] >> 3 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
646 |
vpslld(xmm1, xmm1, 32-18, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
647 |
vpxor(xmm3, xmm3, xmm1, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
648 |
vpxor(xmm3, xmm3, xmm2, AVX_256bit); // ymm3 = W[-15] ror 7 ^ W[-15] ror 18 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
649 |
vpxor(xmm1, xmm3, xmm8, AVX_256bit); // ymm1 = s0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
650 |
vpshufd(xmm2, xmm_3, 0xFA, AVX_256bit); // 11111010b ; ymm2 = W[-2] {BBAA} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
651 |
vpaddd(xmm0, xmm0, xmm1, AVX_256bit); // ymm0 = W[-16] + W[-7] + s0 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
652 |
vpsrld(xmm8, xmm2, 10, AVX_256bit); // ymm8 = W[-2] >> 10 {BBAA} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
653 |
} else if (iter%4 == 2) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
654 |
vpsrlq(xmm3, xmm2, 19, AVX_256bit); // ymm3 = W[-2] ror 19 {xBxA} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
655 |
vpsrlq(xmm2, xmm2, 17, AVX_256bit); // ymm2 = W[-2] ror 17 {xBxA} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
656 |
vpxor(xmm2, xmm2, xmm3, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
657 |
vpxor(xmm8, xmm8, xmm2, AVX_256bit); // ymm8 = s1 {xBxA} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
658 |
vpshufb(xmm8, xmm8, xmm10, AVX_256bit); // ymm8 = s1 {00BA} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
659 |
vpaddd(xmm0, xmm0, xmm8, AVX_256bit); // ymm0 = {..., ..., W[1], W[0]} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
660 |
vpshufd(xmm2, xmm0, 0x50, AVX_256bit); // 01010000b ; ymm2 = W[-2] {DDCC} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
661 |
} else if (iter%4 == 3) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
662 |
vpsrld(xmm11, xmm2, 10, AVX_256bit); // ymm11 = W[-2] >> 10 {DDCC} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
663 |
vpsrlq(xmm3, xmm2, 19, AVX_256bit); // ymm3 = W[-2] ror 19 {xDxC} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
664 |
vpsrlq(xmm2, xmm2, 17, AVX_256bit); // ymm2 = W[-2] ror 17 {xDxC} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
665 |
vpxor(xmm2, xmm2, xmm3, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
666 |
vpxor(xmm11, xmm11, xmm2, AVX_256bit); // ymm11 = s1 {xDxC} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
667 |
vpshufb(xmm11, xmm11, xmm12, AVX_256bit); // ymm11 = s1 {DC00} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
668 |
vpaddd(xmm_0, xmm11, xmm0, AVX_256bit); // xmm_0 = {W[3], W[2], W[1], W[0]} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
669 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
670 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
671 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
672 |
void MacroAssembler::addm(int disp, Register r1, Register r2) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
673 |
addl(r2, Address(r1, disp)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
674 |
movl(Address(r1, disp), r2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
675 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
676 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
677 |
void MacroAssembler::sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
678 |
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
679 |
Register buf, Register state, Register ofs, Register limit, Register rsp, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
680 |
bool multi_block, XMMRegister shuf_mask) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
681 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
682 |
Label loop0, loop1, loop2, loop3, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
683 |
last_block_enter, do_last_block, only_one_block, done_hash, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
684 |
compute_size, compute_size_end, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
685 |
compute_size1, compute_size_end1; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
686 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
687 |
address K256_W = StubRoutines::x86::k256_W_addr(); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
688 |
address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr(); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
689 |
address pshuffle_byte_flip_mask_addr = 0; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
690 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
691 |
const XMMRegister& SHUF_00BA = xmm10; // ymm10: shuffle xBxA -> 00BA |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
692 |
const XMMRegister& SHUF_DC00 = xmm12; // ymm12: shuffle xDxC -> DC00 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
693 |
const XMMRegister& BYTE_FLIP_MASK = xmm13; // ymm13 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
694 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
695 |
const XMMRegister& X_BYTE_FLIP_MASK = xmm13; //XMM version of BYTE_FLIP_MASK |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
696 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
697 |
const Register& NUM_BLKS = r8; // 3rd arg |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
698 |
const Register& CTX = rdx; // 2nd arg |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
699 |
const Register& INP = rcx; // 1st arg |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
700 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
701 |
const Register& c = rdi; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
702 |
const Register& d = rsi; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
703 |
const Register& e = r8; // clobbers NUM_BLKS |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
704 |
const Register& y3 = rcx; // clobbers INP |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
705 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
706 |
const Register& TBL = rbp; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
707 |
const Register& SRND = CTX; // SRND is same register as CTX |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
708 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
709 |
const Register& a = rax; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
710 |
const Register& b = rbx; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
711 |
const Register& f = r9; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
712 |
const Register& g = r10; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
713 |
const Register& h = r11; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
714 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
715 |
const Register& T1 = r12; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
716 |
const Register& y0 = r13; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
717 |
const Register& y1 = r14; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
718 |
const Register& y2 = r15; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
719 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
720 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
721 |
enum { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
722 |
_XFER_SIZE = 2*64*4, // 2 blocks, 64 rounds, 4 bytes/round |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
723 |
_INP_END_SIZE = 8, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
724 |
_INP_SIZE = 8, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
725 |
_CTX_SIZE = 8, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
726 |
_RSP_SIZE = 8, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
727 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
728 |
_XFER = 0, |
41333
ce08d64b41c7
8078122: YMM registers upper 128 bits may get clobbered by a JNI call on windows
kvn
parents:
38135
diff
changeset
|
729 |
_INP_END = _XFER + _XFER_SIZE, |
38135
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
730 |
_INP = _INP_END + _INP_END_SIZE, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
731 |
_CTX = _INP + _INP_SIZE, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
732 |
_RSP = _CTX + _CTX_SIZE, |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
733 |
STACK_SIZE = _RSP + _RSP_SIZE |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
734 |
}; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
735 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
736 |
#ifndef _WIN64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
737 |
push(rcx); // linux: this is limit, need at the end |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
738 |
push(rdx); // linux: this is ofs |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
739 |
#else |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
740 |
push(r8); // win64: this is ofs |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
741 |
push(r9); // win64: this is limit, we need them again at the very and |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
742 |
#endif |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
743 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
744 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
745 |
push(rbx); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
746 |
#ifdef _WIN64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
747 |
push(rsi); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
748 |
push(rdi); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
749 |
#endif |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
750 |
push(rbp); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
751 |
push(r12); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
752 |
push(r13); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
753 |
push(r14); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
754 |
push(r15); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
755 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
756 |
movq(rax, rsp); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
757 |
subq(rsp, STACK_SIZE); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
758 |
andq(rsp, -32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
759 |
movq(Address(rsp, _RSP), rax); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
760 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
761 |
#ifndef _WIN64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
762 |
// copy linux params to win64 params, therefore the rest of code will be the same for both |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
763 |
movq(r9, rcx); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
764 |
movq(r8, rdx); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
765 |
movq(rdx, rsi); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
766 |
movq(rcx, rdi); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
767 |
#endif |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
768 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
769 |
// setting original assembly ABI |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
770 |
/** message to encrypt in INP */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
771 |
lea(INP, Address(rcx, 0)); // rcx == message (buf) ;; linux: INP = buf = rdi |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
772 |
/** digest in CTX */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
773 |
movq(CTX, rdx); // rdx = digest (state) ;; linux: CTX = state = rsi |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
774 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
775 |
/** NUM_BLK is the length of message, need to set it from ofs and limit */ |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
776 |
if (multi_block) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
777 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
778 |
// Win64: cannot directly update NUM_BLKS, since NUM_BLKS = ofs = r8 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
779 |
// on entry r8 = ofs |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
780 |
// on exit r8 = NUM_BLKS |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
781 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
782 |
xorq(rax, rax); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
783 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
784 |
bind(compute_size); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
785 |
cmpptr(r8, r9); // assume the original ofs <= limit ;; linux: cmp rcx, rdx |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
786 |
jccb(Assembler::aboveEqual, compute_size_end); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
787 |
addq(r8, 64); //;; linux: ofs = rdx |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
788 |
addq(rax, 64); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
789 |
jmpb(compute_size); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
790 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
791 |
bind(compute_size_end); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
792 |
movq(NUM_BLKS, rax); // NUM_BLK (r8) ;; linux: NUM_BLK = rdx |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
793 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
794 |
cmpq(NUM_BLKS, 0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
795 |
jcc(Assembler::equal, done_hash); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
796 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
797 |
} else { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
798 |
xorq(NUM_BLKS, NUM_BLKS); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
799 |
addq(NUM_BLKS, 64); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
800 |
}//if (!multi_block) |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
801 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
802 |
lea(NUM_BLKS, Address(INP, NUM_BLKS, Address::times_1, -64)); // pointer to the last block |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
803 |
movq(Address(rsp, _INP_END), NUM_BLKS); // |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
804 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
805 |
cmpptr(INP, NUM_BLKS); //cmp INP, NUM_BLKS |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
806 |
jcc(Assembler::equal, only_one_block); //je only_one_block |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
807 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
808 |
// load initial digest |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
809 |
movl(a, Address(CTX, 4*0)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
810 |
movl(b, Address(CTX, 4*1)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
811 |
movl(c, Address(CTX, 4*2)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
812 |
movl(d, Address(CTX, 4*3)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
813 |
movl(e, Address(CTX, 4*4)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
814 |
movl(f, Address(CTX, 4*5)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
815 |
movl(g, Address(CTX, 4*6)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
816 |
movl(h, Address(CTX, 4*7)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
817 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
818 |
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
819 |
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr +0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
820 |
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
821 |
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
822 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
823 |
movq(Address(rsp, _CTX), CTX); // store |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
824 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
825 |
bind(loop0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
826 |
lea(TBL, ExternalAddress(K256_W)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
827 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
828 |
// assume buffers not aligned |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
829 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
830 |
// Load first 16 dwords from two blocks |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
831 |
vmovdqu(xmm0, Address(INP, 0*32)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
832 |
vmovdqu(xmm1, Address(INP, 1*32)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
833 |
vmovdqu(xmm2, Address(INP, 2*32)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
834 |
vmovdqu(xmm3, Address(INP, 3*32)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
835 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
836 |
// byte swap data |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
837 |
vpshufb(xmm0, xmm0, BYTE_FLIP_MASK, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
838 |
vpshufb(xmm1, xmm1, BYTE_FLIP_MASK, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
839 |
vpshufb(xmm2, xmm2, BYTE_FLIP_MASK, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
840 |
vpshufb(xmm3, xmm3, BYTE_FLIP_MASK, AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
841 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
842 |
// transpose data into high/low halves |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
843 |
vperm2i128(xmm4, xmm0, xmm2, 0x20); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
844 |
vperm2i128(xmm5, xmm0, xmm2, 0x31); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
845 |
vperm2i128(xmm6, xmm1, xmm3, 0x20); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
846 |
vperm2i128(xmm7, xmm1, xmm3, 0x31); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
847 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
848 |
bind(last_block_enter); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
849 |
addq(INP, 64); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
850 |
movq(Address(rsp, _INP), INP); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
851 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
852 |
//;; schedule 48 input dwords, by doing 3 rounds of 12 each |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
853 |
xorq(SRND, SRND); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
854 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
855 |
align(16); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
856 |
bind(loop1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
857 |
vpaddd(xmm9, xmm4, Address(TBL, SRND, Address::times_1, 0*32), AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
858 |
vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 0*32), xmm9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
859 |
sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, rax, rbx, rdi, rsi, r8, r9, r10, r11, 0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
860 |
sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, r11, rax, rbx, rdi, rsi, r8, r9, r10, 1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
861 |
sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, r10, r11, rax, rbx, rdi, rsi, r8, r9, 2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
862 |
sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, r9, r10, r11, rax, rbx, rdi, rsi, r8, 3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
863 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
864 |
vpaddd(xmm9, xmm5, Address(TBL, SRND, Address::times_1, 1*32), AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
865 |
vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 1*32), xmm9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
866 |
sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, r8, r9, r10, r11, rax, rbx, rdi, rsi, 8+0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
867 |
sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, rsi, r8, r9, r10, r11, rax, rbx, rdi, 8+1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
868 |
sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, rdi, rsi, r8, r9, r10, r11, rax, rbx, 8+2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
869 |
sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, rbx, rdi, rsi, r8, r9, r10, r11, rax, 8+3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
870 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
871 |
vpaddd(xmm9, xmm6, Address(TBL, SRND, Address::times_1, 2*32), AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
872 |
vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 2*32), xmm9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
873 |
sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, rax, rbx, rdi, rsi, r8, r9, r10, r11, 16+0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
874 |
sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, r11, rax, rbx, rdi, rsi, r8, r9, r10, 16+1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
875 |
sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, r10, r11, rax, rbx, rdi, rsi, r8, r9, 16+2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
876 |
sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, r9, r10, r11, rax, rbx, rdi, rsi, r8, 16+3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
877 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
878 |
vpaddd(xmm9, xmm7, Address(TBL, SRND, Address::times_1, 3*32), AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
879 |
vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 3*32), xmm9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
880 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
881 |
sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, r8, r9, r10, r11, rax, rbx, rdi, rsi, 24+0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
882 |
sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, rsi, r8, r9, r10, r11, rax, rbx, rdi, 24+1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
883 |
sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, rdi, rsi, r8, r9, r10, r11, rax, rbx, 24+2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
884 |
sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, rbx, rdi, rsi, r8, r9, r10, r11, rax, 24+3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
885 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
886 |
addq(SRND, 4*32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
887 |
cmpq(SRND, 3 * 4*32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
888 |
jcc(Assembler::below, loop1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
889 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
890 |
bind(loop2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
891 |
// Do last 16 rounds with no scheduling |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
892 |
vpaddd(xmm9, xmm4, Address(TBL, SRND, Address::times_1, 0*32), AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
893 |
vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 0*32), xmm9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
894 |
sha256_AVX2_four_rounds_compute_first(0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
895 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
896 |
vpaddd(xmm9, xmm5, Address(TBL, SRND, Address::times_1, 1*32), AVX_256bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
897 |
vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 1*32), xmm9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
898 |
sha256_AVX2_four_rounds_compute_last(0 + 8); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
899 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
900 |
addq(SRND, 2*32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
901 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
902 |
vmovdqu(xmm4, xmm6); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
903 |
vmovdqu(xmm5, xmm7); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
904 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
905 |
cmpq(SRND, 4 * 4*32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
906 |
jcc(Assembler::below, loop2); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
907 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
908 |
movq(CTX, Address(rsp, _CTX)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
909 |
movq(INP, Address(rsp, _INP)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
910 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
911 |
addm(4*0, CTX, a); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
912 |
addm(4*1, CTX, b); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
913 |
addm(4*2, CTX, c); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
914 |
addm(4*3, CTX, d); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
915 |
addm(4*4, CTX, e); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
916 |
addm(4*5, CTX, f); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
917 |
addm(4*6, CTX, g); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
918 |
addm(4*7, CTX, h); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
919 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
920 |
cmpq(INP, Address(rsp, _INP_END)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
921 |
jcc(Assembler::above, done_hash); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
922 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
923 |
//Do second block using previously scheduled results |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
924 |
xorq(SRND, SRND); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
925 |
align(16); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
926 |
bind(loop3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
927 |
sha256_AVX2_four_rounds_compute_first(4); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
928 |
sha256_AVX2_four_rounds_compute_last(4+8); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
929 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
930 |
addq(SRND, 2*32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
931 |
cmpq(SRND, 4 * 4*32); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
932 |
jcc(Assembler::below, loop3); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
933 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
934 |
movq(CTX, Address(rsp, _CTX)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
935 |
movq(INP, Address(rsp, _INP)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
936 |
addq(INP, 64); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
937 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
938 |
addm(4*0, CTX, a); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
939 |
addm(4*1, CTX, b); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
940 |
addm(4*2, CTX, c); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
941 |
addm(4*3, CTX, d); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
942 |
addm(4*4, CTX, e); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
943 |
addm(4*5, CTX, f); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
944 |
addm(4*6, CTX, g); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
945 |
addm(4*7, CTX, h); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
946 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
947 |
cmpq(INP, Address(rsp, _INP_END)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
948 |
jcc(Assembler::below, loop0); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
949 |
jccb(Assembler::above, done_hash); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
950 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
951 |
bind(do_last_block); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
952 |
lea(TBL, ExternalAddress(K256_W)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
953 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
954 |
movdqu(xmm4, Address(INP, 0*16)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
955 |
movdqu(xmm5, Address(INP, 1*16)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
956 |
movdqu(xmm6, Address(INP, 2*16)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
957 |
movdqu(xmm7, Address(INP, 3*16)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
958 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
959 |
vpshufb(xmm4, xmm4, xmm13, AVX_128bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
960 |
vpshufb(xmm5, xmm5, xmm13, AVX_128bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
961 |
vpshufb(xmm6, xmm6, xmm13, AVX_128bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
962 |
vpshufb(xmm7, xmm7, xmm13, AVX_128bit); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
963 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
964 |
jmp(last_block_enter); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
965 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
966 |
bind(only_one_block); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
967 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
968 |
// load initial digest ;; table should be preloaded with following values |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
969 |
movl(a, Address(CTX, 4*0)); // 0x6a09e667 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
970 |
movl(b, Address(CTX, 4*1)); // 0xbb67ae85 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
971 |
movl(c, Address(CTX, 4*2)); // 0x3c6ef372 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
972 |
movl(d, Address(CTX, 4*3)); // 0xa54ff53a |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
973 |
movl(e, Address(CTX, 4*4)); // 0x510e527f |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
974 |
movl(f, Address(CTX, 4*5)); // 0x9b05688c |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
975 |
movl(g, Address(CTX, 4*6)); // 0x1f83d9ab |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
976 |
movl(h, Address(CTX, 4*7)); // 0x5be0cd19 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
977 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
978 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
979 |
pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
980 |
vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
981 |
vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); //[_SHUF_00BA wrt rip] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
982 |
vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); //[_SHUF_DC00 wrt rip] |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
983 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
984 |
movq(Address(rsp, _CTX), CTX); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
985 |
jmpb(do_last_block); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
986 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
987 |
bind(done_hash); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
988 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
989 |
movq(rsp, Address(rsp, _RSP)); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
990 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
991 |
pop(r15); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
992 |
pop(r14); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
993 |
pop(r13); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
994 |
pop(r12); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
995 |
pop(rbp); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
996 |
#ifdef _WIN64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
997 |
pop(rdi); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
998 |
pop(rsi); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
999 |
#endif |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1000 |
pop(rbx); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1001 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1002 |
#ifdef _WIN64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1003 |
pop(r9); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1004 |
pop(r8); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1005 |
#else |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1006 |
pop(rdx); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1007 |
pop(rcx); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1008 |
#endif |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1009 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1010 |
if (multi_block) { |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1011 |
#ifdef _WIN64 |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1012 |
const Register& limit_end = r9; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1013 |
const Register& ofs_end = r8; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1014 |
#else |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1015 |
const Register& limit_end = rcx; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1016 |
const Register& ofs_end = rdx; |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1017 |
#endif |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1018 |
movq(rax, ofs_end); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1019 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1020 |
bind(compute_size1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1021 |
cmpptr(rax, limit_end); // assume the original ofs <= limit |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1022 |
jccb(Assembler::aboveEqual, compute_size_end1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1023 |
addq(rax, 64); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1024 |
jmpb(compute_size1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1025 |
|
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1026 |
bind(compute_size_end1); |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1027 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1028 |
} |
e06e2d071465
8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents:
36555
diff
changeset
|
1029 |
#endif //#ifdef _LP64 |