hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp
author duke
Wed, 05 Jul 2017 23:10:03 +0200
changeset 44509 02253db2ace1
parent 43423 bcaab17f72a5
permissions -rw-r--r--
Merge
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
36555
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     1
/*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     2
* Copyright (c) 2016, Intel Corporation.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     3
*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     5
*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     6
* This code is free software; you can redistribute it and/or modify it
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     7
* under the terms of the GNU General Public License version 2 only, as
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     8
* published by the Free Software Foundation.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
     9
*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    10
* This code is distributed in the hope that it will be useful, but WITHOUT
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    12
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    13
* version 2 for more details (a copy is included in the LICENSE file that
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    14
* accompanied this code).
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    15
*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    16
* You should have received a copy of the GNU General Public License version
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    17
* 2 along with this work; if not, write to the Free Software Foundation,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    19
*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    21
* or visit www.oracle.com if you need additional information or have any
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    22
* questions.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    23
*
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    24
*/
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    25
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    26
#include "precompiled.hpp"
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    27
#include "asm/assembler.hpp"
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    28
#include "asm/assembler.inline.hpp"
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    29
#include "runtime/stubRoutines.hpp"
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    30
#include "macroAssembler_x86.hpp"
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    31
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    32
// ofs and limit are used for multi-block byte array.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    33
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    34
void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    35
  XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    36
  Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) {
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    37
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    38
  Label start, done_hash, loop0;
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    39
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    40
  address upper_word_mask = StubRoutines::x86::upper_word_mask_addr();
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    41
  address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr();
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    42
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    43
  bind(start);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    44
  movdqu(abcd, Address(state, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    45
  pinsrd(e0, Address(state, 16), 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    46
  movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    47
  pand(e0, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    48
  pshufd(abcd, abcd, 0x1B);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    49
  movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    50
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    51
  bind(loop0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    52
  // Save hash values for addition after rounds
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    53
  movdqu(Address(rsp, 0), e0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    54
  movdqu(Address(rsp, 16), abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    55
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    56
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    57
  // Rounds 0 - 3
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    58
  movdqu(msg0, Address(buf, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    59
  pshufb(msg0, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    60
  paddd(e0, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    61
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    62
  sha1rnds4(abcd, e0, 0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    63
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    64
  // Rounds 4 - 7
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    65
  movdqu(msg1, Address(buf, 16));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    66
  pshufb(msg1, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    67
  sha1nexte(e1, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    68
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    69
  sha1rnds4(abcd, e1, 0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    70
  sha1msg1(msg0, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    71
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    72
  // Rounds 8 - 11
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    73
  movdqu(msg2, Address(buf, 32));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    74
  pshufb(msg2, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    75
  sha1nexte(e0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    76
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    77
  sha1rnds4(abcd, e0, 0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    78
  sha1msg1(msg1, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    79
  pxor(msg0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    80
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    81
  // Rounds 12 - 15
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    82
  movdqu(msg3, Address(buf, 48));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    83
  pshufb(msg3, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    84
  sha1nexte(e1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    85
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    86
  sha1msg2(msg0, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    87
  sha1rnds4(abcd, e1, 0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    88
  sha1msg1(msg2, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    89
  pxor(msg1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    90
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    91
  // Rounds 16 - 19
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    92
  sha1nexte(e0, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    93
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    94
  sha1msg2(msg1, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    95
  sha1rnds4(abcd, e0, 0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    96
  sha1msg1(msg3, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    97
  pxor(msg2, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    98
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
    99
  // Rounds 20 - 23
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   100
  sha1nexte(e1, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   101
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   102
  sha1msg2(msg2, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   103
  sha1rnds4(abcd, e1, 1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   104
  sha1msg1(msg0, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   105
  pxor(msg3, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   106
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   107
  // Rounds 24 - 27
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   108
  sha1nexte(e0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   109
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   110
  sha1msg2(msg3, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   111
  sha1rnds4(abcd, e0, 1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   112
  sha1msg1(msg1, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   113
  pxor(msg0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   114
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   115
  // Rounds 28 - 31
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   116
  sha1nexte(e1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   117
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   118
  sha1msg2(msg0, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   119
  sha1rnds4(abcd, e1, 1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   120
  sha1msg1(msg2, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   121
  pxor(msg1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   122
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   123
  // Rounds 32 - 35
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   124
  sha1nexte(e0, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   125
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   126
  sha1msg2(msg1, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   127
  sha1rnds4(abcd, e0, 1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   128
  sha1msg1(msg3, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   129
  pxor(msg2, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   130
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   131
  // Rounds 36 - 39
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   132
  sha1nexte(e1, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   133
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   134
  sha1msg2(msg2, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   135
  sha1rnds4(abcd, e1, 1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   136
  sha1msg1(msg0, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   137
  pxor(msg3, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   138
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   139
  // Rounds 40 - 43
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   140
  sha1nexte(e0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   141
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   142
  sha1msg2(msg3, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   143
  sha1rnds4(abcd, e0, 2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   144
  sha1msg1(msg1, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   145
  pxor(msg0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   146
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   147
  // Rounds 44 - 47
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   148
  sha1nexte(e1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   149
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   150
  sha1msg2(msg0, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   151
  sha1rnds4(abcd, e1, 2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   152
  sha1msg1(msg2, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   153
  pxor(msg1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   154
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   155
  // Rounds 48 - 51
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   156
  sha1nexte(e0, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   157
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   158
  sha1msg2(msg1, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   159
  sha1rnds4(abcd, e0, 2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   160
  sha1msg1(msg3, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   161
  pxor(msg2, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   162
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   163
  // Rounds 52 - 55
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   164
  sha1nexte(e1, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   165
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   166
  sha1msg2(msg2, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   167
  sha1rnds4(abcd, e1, 2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   168
  sha1msg1(msg0, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   169
  pxor(msg3, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   170
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   171
  // Rounds 56 - 59
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   172
  sha1nexte(e0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   173
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   174
  sha1msg2(msg3, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   175
  sha1rnds4(abcd, e0, 2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   176
  sha1msg1(msg1, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   177
  pxor(msg0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   178
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   179
  // Rounds 60 - 63
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   180
  sha1nexte(e1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   181
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   182
  sha1msg2(msg0, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   183
  sha1rnds4(abcd, e1, 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   184
  sha1msg1(msg2, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   185
  pxor(msg1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   186
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   187
  // Rounds 64 - 67
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   188
  sha1nexte(e0, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   189
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   190
  sha1msg2(msg1, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   191
  sha1rnds4(abcd, e0, 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   192
  sha1msg1(msg3, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   193
  pxor(msg2, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   194
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   195
  // Rounds 68 - 71
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   196
  sha1nexte(e1, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   197
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   198
  sha1msg2(msg2, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   199
  sha1rnds4(abcd, e1, 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   200
  pxor(msg3, msg1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   201
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   202
  // Rounds 72 - 75
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   203
  sha1nexte(e0, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   204
  movdqa(e1, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   205
  sha1msg2(msg3, msg2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   206
  sha1rnds4(abcd, e0, 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   207
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   208
  // Rounds 76 - 79
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   209
  sha1nexte(e1, msg3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   210
  movdqa(e0, abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   211
  sha1rnds4(abcd, e1, 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   212
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   213
  // add current hash values with previously saved
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   214
  movdqu(msg0, Address(rsp, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   215
  sha1nexte(e0, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   216
  movdqu(msg0, Address(rsp, 16));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   217
  paddd(abcd, msg0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   218
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   219
  if (multi_block) {
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   220
    // increment data pointer and loop if more to process
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   221
    addptr(buf, 64);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   222
    addptr(ofs, 64);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   223
    cmpptr(ofs, limit);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   224
    jcc(Assembler::belowEqual, loop0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   225
    movptr(rax, ofs); //return ofs
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   226
  }
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   227
  // write hash values back in the correct order
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   228
  pshufd(abcd, abcd, 0x1b);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   229
  movdqu(Address(state, 0), abcd);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   230
  pextrd(Address(state, 16), e0, 3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   231
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   232
  bind(done_hash);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   233
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   234
}
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   235
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   236
// xmm0 (msg) is used as an implicit argument to sh256rnds2
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   237
// and state0 and state1 can never use xmm0 register.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   238
// ofs and limit are used for multi-block byte array.
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   239
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   240
#ifdef _LP64
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   241
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   242
  XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   243
  Register buf, Register state, Register ofs, Register limit, Register rsp,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   244
  bool multi_block, XMMRegister shuf_mask) {
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   245
#else
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   246
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   247
  XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   248
  Register buf, Register state, Register ofs, Register limit, Register rsp,
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   249
  bool multi_block) {
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   250
#endif
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   251
  Label start, done_hash, loop0;
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   252
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   253
  address K256 = StubRoutines::x86::k256_addr();
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   254
  address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   255
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   256
  bind(start);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   257
  movdqu(state0, Address(state, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   258
  movdqu(state1, Address(state, 16));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   259
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   260
  pshufd(state0, state0, 0xB1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   261
  pshufd(state1, state1, 0x1B);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   262
  movdqa(msgtmp4, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   263
  palignr(state0, state1, 8);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   264
  pblendw(state1, msgtmp4, 0xF0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   265
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   266
#ifdef _LP64
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   267
  movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   268
#endif
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   269
  lea(rax, ExternalAddress(K256));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   270
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   271
  bind(loop0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   272
  movdqu(Address(rsp, 0), state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   273
  movdqu(Address(rsp, 16), state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   274
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   275
  // Rounds 0-3
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   276
  movdqu(msg, Address(buf, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   277
#ifdef _LP64
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   278
  pshufb(msg, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   279
#else
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   280
  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   281
#endif
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   282
  movdqa(msgtmp0, msg);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   283
  paddd(msg, Address(rax, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   284
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   285
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   286
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   287
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   288
  // Rounds 4-7
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   289
  movdqu(msg, Address(buf, 16));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   290
#ifdef _LP64
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   291
  pshufb(msg, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   292
#else
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   293
  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   294
#endif
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   295
  movdqa(msgtmp1, msg);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   296
  paddd(msg, Address(rax, 16));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   297
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   298
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   299
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   300
  sha256msg1(msgtmp0, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   301
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   302
  // Rounds 8-11
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   303
  movdqu(msg, Address(buf, 32));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   304
#ifdef _LP64
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   305
  pshufb(msg, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   306
#else
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   307
  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   308
#endif
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   309
  movdqa(msgtmp2, msg);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   310
  paddd(msg, Address(rax, 32));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   311
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   312
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   313
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   314
  sha256msg1(msgtmp1, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   315
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   316
  // Rounds 12-15
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   317
  movdqu(msg, Address(buf, 48));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   318
#ifdef _LP64
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   319
  pshufb(msg, shuf_mask);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   320
#else
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   321
  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   322
#endif
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   323
  movdqa(msgtmp3, msg);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   324
  paddd(msg, Address(rax, 48));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   325
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   326
  movdqa(msgtmp4, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   327
  palignr(msgtmp4, msgtmp2, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   328
  paddd(msgtmp0, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   329
  sha256msg2(msgtmp0, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   330
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   331
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   332
  sha256msg1(msgtmp2, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   333
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   334
  // Rounds 16-19
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   335
  movdqa(msg, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   336
  paddd(msg, Address(rax, 64));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   337
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   338
  movdqa(msgtmp4, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   339
  palignr(msgtmp4, msgtmp3, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   340
  paddd(msgtmp1, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   341
  sha256msg2(msgtmp1, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   342
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   343
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   344
  sha256msg1(msgtmp3, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   345
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   346
  // Rounds 20-23
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   347
  movdqa(msg, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   348
  paddd(msg, Address(rax, 80));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   349
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   350
  movdqa(msgtmp4, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   351
  palignr(msgtmp4, msgtmp0, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   352
  paddd(msgtmp2, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   353
  sha256msg2(msgtmp2, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   354
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   355
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   356
  sha256msg1(msgtmp0, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   357
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   358
  // Rounds 24-27
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   359
  movdqa(msg, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   360
  paddd(msg, Address(rax, 96));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   361
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   362
  movdqa(msgtmp4, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   363
  palignr(msgtmp4, msgtmp1, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   364
  paddd(msgtmp3, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   365
  sha256msg2(msgtmp3, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   366
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   367
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   368
  sha256msg1(msgtmp1, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   369
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   370
  // Rounds 28-31
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   371
  movdqa(msg, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   372
  paddd(msg, Address(rax, 112));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   373
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   374
  movdqa(msgtmp4, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   375
  palignr(msgtmp4, msgtmp2, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   376
  paddd(msgtmp0, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   377
  sha256msg2(msgtmp0, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   378
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   379
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   380
  sha256msg1(msgtmp2, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   381
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   382
  // Rounds 32-35
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   383
  movdqa(msg, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   384
  paddd(msg, Address(rax, 128));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   385
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   386
  movdqa(msgtmp4, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   387
  palignr(msgtmp4, msgtmp3, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   388
  paddd(msgtmp1, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   389
  sha256msg2(msgtmp1, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   390
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   391
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   392
  sha256msg1(msgtmp3, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   393
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   394
  // Rounds 36-39
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   395
  movdqa(msg, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   396
  paddd(msg, Address(rax, 144));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   397
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   398
  movdqa(msgtmp4, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   399
  palignr(msgtmp4, msgtmp0, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   400
  paddd(msgtmp2, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   401
  sha256msg2(msgtmp2, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   402
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   403
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   404
  sha256msg1(msgtmp0, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   405
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   406
  // Rounds 40-43
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   407
  movdqa(msg, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   408
  paddd(msg, Address(rax, 160));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   409
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   410
  movdqa(msgtmp4, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   411
  palignr(msgtmp4, msgtmp1, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   412
  paddd(msgtmp3, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   413
  sha256msg2(msgtmp3, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   414
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   415
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   416
  sha256msg1(msgtmp1, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   417
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   418
  // Rounds 44-47
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   419
  movdqa(msg, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   420
  paddd(msg, Address(rax, 176));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   421
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   422
  movdqa(msgtmp4, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   423
  palignr(msgtmp4, msgtmp2, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   424
  paddd(msgtmp0, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   425
  sha256msg2(msgtmp0, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   426
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   427
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   428
  sha256msg1(msgtmp2, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   429
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   430
  // Rounds 48-51
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   431
  movdqa(msg, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   432
  paddd(msg, Address(rax, 192));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   433
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   434
  movdqa(msgtmp4, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   435
  palignr(msgtmp4, msgtmp3, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   436
  paddd(msgtmp1, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   437
  sha256msg2(msgtmp1, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   438
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   439
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   440
  sha256msg1(msgtmp3, msgtmp0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   441
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   442
  // Rounds 52-55
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   443
  movdqa(msg, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   444
  paddd(msg, Address(rax, 208));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   445
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   446
  movdqa(msgtmp4, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   447
  palignr(msgtmp4, msgtmp0, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   448
  paddd(msgtmp2, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   449
  sha256msg2(msgtmp2, msgtmp1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   450
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   451
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   452
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   453
  // Rounds 56-59
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   454
  movdqa(msg, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   455
  paddd(msg, Address(rax, 224));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   456
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   457
  movdqa(msgtmp4, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   458
  palignr(msgtmp4, msgtmp1, 4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   459
  paddd(msgtmp3, msgtmp4);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   460
  sha256msg2(msgtmp3, msgtmp2);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   461
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   462
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   463
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   464
  // Rounds 60-63
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   465
  movdqa(msg, msgtmp3);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   466
  paddd(msg, Address(rax, 240));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   467
  sha256rnds2(state1, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   468
  pshufd(msg, msg, 0x0E);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   469
  sha256rnds2(state0, state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   470
  movdqu(msg, Address(rsp, 0));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   471
  paddd(state0, msg);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   472
  movdqu(msg, Address(rsp, 16));
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   473
  paddd(state1, msg);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   474
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   475
  if (multi_block) {
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   476
    // increment data pointer and loop if more to process
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   477
    addptr(buf, 64);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   478
    addptr(ofs, 64);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   479
    cmpptr(ofs, limit);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   480
    jcc(Assembler::belowEqual, loop0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   481
    movptr(rax, ofs); //return ofs
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   482
  }
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   483
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   484
  pshufd(state0, state0, 0x1B);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   485
  pshufd(state1, state1, 0xB1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   486
  movdqa(msgtmp4, state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   487
  pblendw(state0, state1, 0xF0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   488
  palignr(state1, msgtmp4, 8);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   489
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   490
  movdqu(Address(state, 0), state0);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   491
  movdqu(Address(state, 16), state1);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   492
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   493
  bind(done_hash);
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   494
4f37fd7a5a09 8150767: Enables SHA Extensions on x86
vdeshpande
parents:
diff changeset
   495
}
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   496
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   497
#ifdef _LP64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   498
/*
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   499
  The algorithm below is based on Intel publication:
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   500
  "Fast SHA-256 Implementations on Intelë Architecture Processors" by Jim Guilford, Kirk Yap and Vinodh Gopal.
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   501
  The assembly code was originally provided by Sean Gulley and in many places preserves
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   502
  the original assembly NAMES and comments to simplify matching Java assembly with its original.
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   503
  The Java version was substantially redesigned to replace 1200 assembly instruction with
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   504
  much shorter run-time generator of the same code in memory.
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   505
*/
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   506
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   507
void MacroAssembler::sha256_AVX2_one_round_compute(
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   508
    Register  reg_old_h,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   509
    Register  reg_a,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   510
    Register  reg_b,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   511
    Register  reg_c,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   512
    Register  reg_d,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   513
    Register  reg_e,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   514
    Register  reg_f,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   515
    Register  reg_g,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   516
    Register  reg_h,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   517
    int iter) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   518
  const Register& reg_y0     = r13;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   519
  const Register& reg_y1     = r14;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   520
  const Register& reg_y2     = r15;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   521
  const Register& reg_y3     = rcx;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   522
  const Register& reg_T1     = r12;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   523
  //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND iter ;;;;;;;;;;;;;;;;;;;;;;;;;;;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   524
  if (iter%4 > 0) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   525
    addl(reg_old_h, reg_y2);   // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   526
  }
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   527
  movl(reg_y2, reg_f);         // reg_y2 = reg_f                                ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   528
  rorxd(reg_y0, reg_e, 25);    // reg_y0 = reg_e >> 25   ; S1A
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   529
  rorxd(reg_y1, reg_e, 11);    // reg_y1 = reg_e >> 11    ; S1B
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   530
  xorl(reg_y2, reg_g);         // reg_y2 = reg_f^reg_g                              ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   531
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   532
  xorl(reg_y0, reg_y1);        // reg_y0 = (reg_e>>25) ^ (reg_h>>11)  ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   533
  rorxd(reg_y1, reg_e, 6);     // reg_y1 = (reg_e >> 6)    ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   534
  andl(reg_y2, reg_e);         // reg_y2 = (reg_f^reg_g)&reg_e                          ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   535
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   536
  if (iter%4 > 0) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   537
    addl(reg_old_h, reg_y3);   // reg_h = t1 + S0 + MAJ                     ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   538
  }
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   539
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   540
  xorl(reg_y0, reg_y1);       // reg_y0 = (reg_e>>25) ^ (reg_e>>11) ^ (reg_e>>6) ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   541
  rorxd(reg_T1, reg_a, 13);   // reg_T1 = reg_a >> 13    ; S0B
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   542
  xorl(reg_y2, reg_g);        // reg_y2 = CH = ((reg_f^reg_g)&reg_e)^reg_g                 ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   543
  rorxd(reg_y1, reg_a, 22);   // reg_y1 = reg_a >> 22    ; S0A
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   544
  movl(reg_y3, reg_a);        // reg_y3 = reg_a                                ; MAJA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   545
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   546
  xorl(reg_y1, reg_T1);       // reg_y1 = (reg_a>>22) ^ (reg_a>>13)  ; S0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   547
  rorxd(reg_T1, reg_a, 2);    // reg_T1 = (reg_a >> 2)    ; S0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   548
  addl(reg_h, Address(rsp, rdx, Address::times_1, 4*iter)); // reg_h = k + w + reg_h ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   549
  orl(reg_y3, reg_c);         // reg_y3 = reg_a|reg_c                              ; MAJA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   550
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   551
  xorl(reg_y1, reg_T1);       // reg_y1 = (reg_a>>22) ^ (reg_a>>13) ^ (reg_a>>2) ; S0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   552
  movl(reg_T1, reg_a);        // reg_T1 = reg_a                                ; MAJB
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   553
  andl(reg_y3, reg_b);        // reg_y3 = (reg_a|reg_c)&reg_b                          ; MAJA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   554
  andl(reg_T1, reg_c);        // reg_T1 = reg_a&reg_c                              ; MAJB
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   555
  addl(reg_y2, reg_y0);       // reg_y2 = S1 + CH                          ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   556
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   557
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   558
  addl(reg_d, reg_h);         // reg_d = k + w + reg_h + reg_d                     ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   559
  orl(reg_y3, reg_T1);        // reg_y3 = MAJ = (reg_a|reg_c)&reg_b)|(reg_a&reg_c)             ; MAJ
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   560
  addl(reg_h, reg_y1);        // reg_h = k + w + reg_h + S0                    ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   561
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   562
  addl(reg_d, reg_y2);        // reg_d = k + w + reg_h + reg_d + S1 + CH = reg_d + t1  ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   563
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   564
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   565
  if (iter%4 == 3) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   566
    addl(reg_h, reg_y2);      // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   567
    addl(reg_h, reg_y3);      // reg_h = t1 + S0 + MAJ                     ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   568
  }
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   569
}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   570
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   571
void MacroAssembler::sha256_AVX2_four_rounds_compute_first(int start) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   572
    sha256_AVX2_one_round_compute(rax, rax, rbx, rdi, rsi,  r8,  r9, r10, r11, start + 0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   573
    sha256_AVX2_one_round_compute(r11, r11, rax, rbx, rdi, rsi,  r8,  r9, r10, start + 1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   574
    sha256_AVX2_one_round_compute(r10, r10, r11, rax, rbx, rdi, rsi,  r8,  r9, start + 2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   575
    sha256_AVX2_one_round_compute(r9,  r9,  r10, r11, rax, rbx, rdi, rsi,  r8, start + 3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   576
}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   577
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   578
void MacroAssembler::sha256_AVX2_four_rounds_compute_last(int start) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   579
    sha256_AVX2_one_round_compute(r8,  r8,   r9, r10, r11, rax, rbx, rdi, rsi, start + 0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   580
    sha256_AVX2_one_round_compute(rsi, rsi,  r8,  r9, r10, r11, rax, rbx, rdi, start + 1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   581
    sha256_AVX2_one_round_compute(rdi, rdi, rsi,  r8,  r9, r10, r11, rax, rbx, start + 2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   582
    sha256_AVX2_one_round_compute(rbx, rbx, rdi, rsi,  r8,  r9, r10, r11, rax, start + 3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   583
}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   584
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   585
void MacroAssembler::sha256_AVX2_one_round_and_sched(
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   586
        XMMRegister  xmm_0,     /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   587
        XMMRegister  xmm_1,     /* ymm5 */  /* full cycle is 16 iterations */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   588
        XMMRegister  xmm_2,     /* ymm6 */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   589
        XMMRegister  xmm_3,     /* ymm7 */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   590
        Register  reg_a,        /* == rax on 0 iteration, then rotate 8 register right on each next iteration */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   591
        Register  reg_b,        /* rbx */    /* full cycle is 8 iterations */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   592
        Register  reg_c,        /* rdi */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   593
        Register  reg_d,        /* rsi */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   594
        Register  reg_e,        /* r8 */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   595
        Register  reg_f,        /* r9d */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   596
        Register  reg_g,        /* r10d */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   597
        Register  reg_h,        /* r11d */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   598
        int iter)
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   599
{
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   600
  movl(rcx, reg_a);           // rcx = reg_a               ; MAJA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   601
  rorxd(r13, reg_e, 25);      // r13 = reg_e >> 25    ; S1A
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   602
  rorxd(r14, reg_e, 11);      //  r14 = reg_e >> 11    ; S1B
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   603
  addl(reg_h, Address(rsp, rdx, Address::times_1, 4*iter));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   604
  orl(rcx, reg_c);            // rcx = reg_a|reg_c          ; MAJA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   605
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   606
  movl(r15, reg_f);           // r15 = reg_f               ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   607
  rorxd(r12, reg_a, 13);      // r12 = reg_a >> 13      ; S0B
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   608
  xorl(r13, r14);             // r13 = (reg_e>>25) ^ (reg_e>>11)  ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   609
  xorl(r15, reg_g);           // r15 = reg_f^reg_g         ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   610
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   611
  rorxd(r14, reg_e, 6);       // r14 = (reg_e >> 6)    ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   612
  andl(r15, reg_e);           // r15 = (reg_f^reg_g)&reg_e ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   613
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   614
  xorl(r13, r14);             // r13 = (reg_e>>25) ^ (reg_e>>11) ^ (reg_e>>6) ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   615
  rorxd(r14, reg_a, 22);      // r14 = reg_a >> 22    ; S0A
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   616
  addl(reg_d, reg_h);         // reg_d = k + w + reg_h + reg_d                     ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   617
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   618
  andl(rcx, reg_b);          // rcx = (reg_a|reg_c)&reg_b                          ; MAJA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   619
  xorl(r14, r12);            // r14 = (reg_a>>22) ^ (reg_a>>13)  ; S0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   620
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   621
  rorxd(r12, reg_a, 2);      // r12 = (reg_a >> 2)    ; S0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   622
  xorl(r15, reg_g);          // r15 = CH = ((reg_f^reg_g)&reg_e)^reg_g                 ; CH
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   623
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   624
  xorl(r14, r12);            // r14 = (reg_a>>22) ^ (reg_a>>13) ^ (reg_a>>2) ; S0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   625
  movl(r12, reg_a);          // r12 = reg_a                                ; MAJB
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   626
  andl(r12, reg_c);          // r12 = reg_a&reg_c                              ; MAJB
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   627
  addl(r15, r13);            // r15 = S1 + CH                          ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   628
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   629
  orl(rcx, r12);             // rcx = MAJ = (reg_a|reg_c)&reg_b)|(reg_a&reg_c)             ; MAJ
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   630
  addl(reg_h, r14);          // reg_h = k + w + reg_h + S0                    ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   631
  addl(reg_d, r15);          // reg_d = k + w + reg_h + reg_d + S1 + CH = reg_d + t1  ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   632
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   633
  addl(reg_h, r15);          // reg_h = k + w + reg_h + S0 + S1 + CH = t1 + S0; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   634
  addl(reg_h, rcx);          // reg_h = t1 + S0 + MAJ                     ; --
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   635
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   636
  if (iter%4 == 0) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   637
    vpalignr(xmm0, xmm_3, xmm_2, 4, AVX_256bit);   // ymm0 = W[-7]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   638
    vpaddd(xmm0, xmm0, xmm_0, AVX_256bit);         // ymm0 = W[-7] + W[-16]; y1 = (e >> 6)     ; S1
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   639
    vpalignr(xmm1, xmm_1, xmm_0, 4, AVX_256bit);   // ymm1 = W[-15]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   640
    vpsrld(xmm2, xmm1, 7, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   641
    vpslld(xmm3, xmm1, 32-7, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   642
    vpor(xmm3, xmm3, xmm2, AVX_256bit);            // ymm3 = W[-15] ror 7
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   643
    vpsrld(xmm2, xmm1,18, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   644
  } else if (iter%4 == 1 ) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   645
    vpsrld(xmm8, xmm1, 3, AVX_256bit);             // ymm8 = W[-15] >> 3
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   646
    vpslld(xmm1, xmm1, 32-18, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   647
    vpxor(xmm3, xmm3, xmm1, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   648
    vpxor(xmm3, xmm3, xmm2, AVX_256bit);           // ymm3 = W[-15] ror 7 ^ W[-15] ror 18
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   649
    vpxor(xmm1, xmm3, xmm8, AVX_256bit);           // ymm1 = s0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   650
    vpshufd(xmm2, xmm_3, 0xFA, AVX_256bit);        // 11111010b ; ymm2 = W[-2] {BBAA}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   651
    vpaddd(xmm0, xmm0, xmm1, AVX_256bit);          // ymm0 = W[-16] + W[-7] + s0
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   652
    vpsrld(xmm8, xmm2, 10, AVX_256bit);            // ymm8 = W[-2] >> 10 {BBAA}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   653
  } else if (iter%4 == 2) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   654
    vpsrlq(xmm3, xmm2, 19, AVX_256bit);            // ymm3 = W[-2] ror 19 {xBxA}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   655
    vpsrlq(xmm2, xmm2, 17, AVX_256bit);            // ymm2 = W[-2] ror 17 {xBxA}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   656
    vpxor(xmm2, xmm2, xmm3, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   657
    vpxor(xmm8, xmm8, xmm2, AVX_256bit);           // ymm8 = s1 {xBxA}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   658
    vpshufb(xmm8, xmm8, xmm10, AVX_256bit);        // ymm8 = s1 {00BA}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   659
    vpaddd(xmm0, xmm0, xmm8, AVX_256bit);          // ymm0 = {..., ..., W[1], W[0]}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   660
    vpshufd(xmm2, xmm0, 0x50, AVX_256bit);         // 01010000b ; ymm2 = W[-2] {DDCC}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   661
  } else if (iter%4 == 3) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   662
    vpsrld(xmm11, xmm2, 10, AVX_256bit);           // ymm11 = W[-2] >> 10 {DDCC}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   663
    vpsrlq(xmm3, xmm2, 19, AVX_256bit);            // ymm3 = W[-2] ror 19 {xDxC}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   664
    vpsrlq(xmm2, xmm2, 17, AVX_256bit);            // ymm2 = W[-2] ror 17 {xDxC}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   665
    vpxor(xmm2, xmm2, xmm3, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   666
    vpxor(xmm11, xmm11, xmm2, AVX_256bit);         // ymm11 = s1 {xDxC}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   667
    vpshufb(xmm11, xmm11, xmm12, AVX_256bit);      // ymm11 = s1 {DC00}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   668
    vpaddd(xmm_0, xmm11, xmm0, AVX_256bit);        // xmm_0 = {W[3], W[2], W[1], W[0]}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   669
  }
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   670
}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   671
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   672
void MacroAssembler::addm(int disp, Register r1, Register r2) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   673
  addl(r2, Address(r1, disp));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   674
  movl(Address(r1, disp), r2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   675
}
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   676
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
   677
void MacroAssembler::addmq(int disp, Register r1, Register r2) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
   678
  addq(r2, Address(r1, disp));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
   679
  movq(Address(r1, disp), r2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
   680
}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
   681
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   682
void MacroAssembler::sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   683
  XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   684
  Register buf, Register state, Register ofs, Register limit, Register rsp,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   685
  bool multi_block, XMMRegister shuf_mask) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   686
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   687
  Label loop0, loop1, loop2, loop3,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   688
        last_block_enter, do_last_block, only_one_block, done_hash,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   689
        compute_size, compute_size_end,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   690
        compute_size1, compute_size_end1;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   691
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   692
  address K256_W = StubRoutines::x86::k256_W_addr();
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   693
  address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   694
  address pshuffle_byte_flip_mask_addr = 0;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   695
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   696
const XMMRegister& SHUF_00BA        = xmm10;    // ymm10: shuffle xBxA -> 00BA
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   697
const XMMRegister& SHUF_DC00        = xmm12;    // ymm12: shuffle xDxC -> DC00
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   698
const XMMRegister& BYTE_FLIP_MASK   = xmm13;   // ymm13
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   699
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   700
const XMMRegister& X_BYTE_FLIP_MASK = xmm13;   //XMM version of BYTE_FLIP_MASK
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   701
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   702
const Register& NUM_BLKS = r8;   // 3rd arg
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   703
const Register& CTX      = rdx;  // 2nd arg
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   704
const Register& INP      = rcx;  // 1st arg
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   705
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   706
const Register& c        = rdi;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   707
const Register& d        = rsi;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   708
const Register& e        = r8;    // clobbers NUM_BLKS
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   709
const Register& y3       = rcx;  // clobbers INP
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   710
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   711
const Register& TBL      = rbp;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   712
const Register& SRND     = CTX;   // SRND is same register as CTX
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   713
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   714
const Register& a        = rax;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   715
const Register& b        = rbx;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   716
const Register& f        = r9;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   717
const Register& g        = r10;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   718
const Register& h        = r11;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   719
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   720
const Register& T1       = r12;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   721
const Register& y0       = r13;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   722
const Register& y1       = r14;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   723
const Register& y2       = r15;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   724
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   725
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   726
enum {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   727
  _XFER_SIZE = 2*64*4, // 2 blocks, 64 rounds, 4 bytes/round
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   728
  _INP_END_SIZE = 8,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   729
  _INP_SIZE = 8,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   730
  _CTX_SIZE = 8,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   731
  _RSP_SIZE = 8,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   732
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   733
  _XFER = 0,
41333
ce08d64b41c7 8078122: YMM registers upper 128 bits may get clobbered by a JNI call on windows
kvn
parents: 38135
diff changeset
   734
  _INP_END   = _XFER     + _XFER_SIZE,
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   735
  _INP       = _INP_END  + _INP_END_SIZE,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   736
  _CTX       = _INP      + _INP_SIZE,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   737
  _RSP       = _CTX      + _CTX_SIZE,
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   738
  STACK_SIZE = _RSP      + _RSP_SIZE
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   739
};
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   740
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   741
#ifndef _WIN64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   742
  push(rcx);    // linux: this is limit, need at the end
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   743
  push(rdx);    // linux: this is ofs
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   744
#else
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   745
  push(r8);     // win64: this is ofs
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   746
  push(r9);     // win64: this is limit, we need them again at the very and
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   747
#endif
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   748
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   749
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   750
  push(rbx);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   751
#ifdef _WIN64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   752
  push(rsi);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   753
  push(rdi);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   754
#endif
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   755
  push(rbp);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   756
  push(r12);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   757
  push(r13);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   758
  push(r14);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   759
  push(r15);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   760
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   761
  movq(rax, rsp);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   762
  subq(rsp, STACK_SIZE);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   763
  andq(rsp, -32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   764
  movq(Address(rsp, _RSP), rax);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   765
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   766
#ifndef _WIN64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   767
  // copy linux params to win64 params, therefore the rest of code will be the same for both
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   768
  movq(r9,  rcx);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   769
  movq(r8,  rdx);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   770
  movq(rdx, rsi);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   771
  movq(rcx, rdi);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   772
#endif
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   773
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   774
  // setting original assembly ABI
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   775
  /** message to encrypt in INP */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   776
  lea(INP, Address(rcx, 0));    // rcx == message (buf)     ;; linux: INP = buf = rdi
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   777
  /** digest in CTX             */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   778
  movq(CTX, rdx);               // rdx = digest  (state)    ;; linux: CTX = state = rsi
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   779
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   780
  /** NUM_BLK is the length of message, need to set it from ofs and limit  */
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   781
  if (multi_block) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   782
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   783
    // Win64: cannot directly update NUM_BLKS, since NUM_BLKS = ofs = r8
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   784
    // on entry r8 = ofs
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   785
    // on exit  r8 = NUM_BLKS
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   786
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   787
    xorq(rax, rax);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   788
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   789
    bind(compute_size);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   790
    cmpptr(r8, r9); // assume the original ofs <= limit ;; linux:  cmp rcx, rdx
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   791
    jccb(Assembler::aboveEqual, compute_size_end);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   792
    addq(r8, 64);                                          //;; linux: ofs = rdx
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   793
    addq(rax, 64);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   794
    jmpb(compute_size);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   795
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   796
    bind(compute_size_end);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   797
    movq(NUM_BLKS, rax);  // NUM_BLK (r8)                  ;; linux: NUM_BLK = rdx
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   798
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   799
    cmpq(NUM_BLKS, 0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   800
    jcc(Assembler::equal, done_hash);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   801
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   802
    } else {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   803
    xorq(NUM_BLKS, NUM_BLKS);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   804
    addq(NUM_BLKS, 64);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   805
  }//if (!multi_block)
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   806
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   807
  lea(NUM_BLKS, Address(INP, NUM_BLKS, Address::times_1, -64)); // pointer to the last block
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   808
  movq(Address(rsp, _INP_END), NUM_BLKS);  //
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   809
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   810
  cmpptr(INP, NUM_BLKS);                   //cmp INP, NUM_BLKS
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   811
  jcc(Assembler::equal, only_one_block);   //je only_one_block
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   812
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   813
  // load initial digest
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   814
  movl(a, Address(CTX, 4*0));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   815
  movl(b, Address(CTX, 4*1));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   816
  movl(c, Address(CTX, 4*2));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   817
  movl(d, Address(CTX, 4*3));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   818
  movl(e, Address(CTX, 4*4));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   819
  movl(f, Address(CTX, 4*5));
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
   820
  // load g - r10 after it is used as scratch
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   821
  movl(h, Address(CTX, 4*7));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   822
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   823
  pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   824
  vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr +0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   825
  vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));     //[_SHUF_00BA wrt rip]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   826
  vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64));     //[_SHUF_DC00 wrt rip]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   827
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
   828
  movl(g, Address(CTX, 4*6));
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
   829
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   830
  movq(Address(rsp, _CTX), CTX);           // store
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   831
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   832
bind(loop0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   833
  lea(TBL, ExternalAddress(K256_W));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   834
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   835
  // assume buffers not aligned
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   836
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   837
  // Load first 16 dwords from two blocks
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   838
  vmovdqu(xmm0, Address(INP, 0*32));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   839
  vmovdqu(xmm1, Address(INP, 1*32));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   840
  vmovdqu(xmm2, Address(INP, 2*32));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   841
  vmovdqu(xmm3, Address(INP, 3*32));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   842
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   843
  // byte swap data
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   844
  vpshufb(xmm0, xmm0, BYTE_FLIP_MASK, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   845
  vpshufb(xmm1, xmm1, BYTE_FLIP_MASK, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   846
  vpshufb(xmm2, xmm2, BYTE_FLIP_MASK, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   847
  vpshufb(xmm3, xmm3, BYTE_FLIP_MASK, AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   848
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   849
  // transpose data into high/low halves
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   850
  vperm2i128(xmm4, xmm0, xmm2, 0x20);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   851
  vperm2i128(xmm5, xmm0, xmm2, 0x31);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   852
  vperm2i128(xmm6, xmm1, xmm3, 0x20);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   853
  vperm2i128(xmm7, xmm1, xmm3, 0x31);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   854
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   855
bind(last_block_enter);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   856
  addq(INP, 64);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   857
  movq(Address(rsp, _INP), INP);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   858
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   859
  //;; schedule 48 input dwords, by doing 3 rounds of 12 each
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   860
  xorq(SRND, SRND);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   861
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   862
align(16);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   863
bind(loop1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   864
  vpaddd(xmm9, xmm4, Address(TBL, SRND, Address::times_1, 0*32), AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   865
  vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 0*32), xmm9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   866
  sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, rax, rbx, rdi, rsi, r8,  r9,  r10, r11, 0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   867
  sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, r11, rax, rbx, rdi, rsi, r8,  r9,  r10, 1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   868
  sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, r10, r11, rax, rbx, rdi, rsi, r8,  r9,  2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   869
  sha256_AVX2_one_round_and_sched(xmm4, xmm5, xmm6, xmm7, r9,  r10, r11, rax, rbx, rdi, rsi, r8,  3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   870
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   871
  vpaddd(xmm9, xmm5, Address(TBL, SRND, Address::times_1, 1*32), AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   872
  vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 1*32), xmm9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   873
  sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, r8,  r9,  r10, r11, rax, rbx, rdi, rsi,  8+0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   874
  sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, rsi, r8,  r9,  r10, r11, rax, rbx, rdi,  8+1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   875
  sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, rdi, rsi, r8,  r9,  r10, r11, rax, rbx,  8+2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   876
  sha256_AVX2_one_round_and_sched(xmm5, xmm6, xmm7, xmm4, rbx, rdi, rsi, r8,  r9,  r10, r11, rax,  8+3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   877
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   878
  vpaddd(xmm9, xmm6, Address(TBL, SRND, Address::times_1, 2*32), AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   879
  vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 2*32), xmm9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   880
  sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, rax, rbx, rdi, rsi, r8,  r9,  r10, r11, 16+0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   881
  sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, r11, rax, rbx, rdi, rsi, r8,  r9,  r10, 16+1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   882
  sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, r10, r11, rax, rbx, rdi, rsi, r8,  r9,  16+2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   883
  sha256_AVX2_one_round_and_sched(xmm6, xmm7, xmm4, xmm5, r9,  r10, r11, rax, rbx, rdi, rsi, r8,  16+3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   884
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   885
  vpaddd(xmm9, xmm7, Address(TBL, SRND, Address::times_1, 3*32), AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   886
  vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 3*32), xmm9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   887
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   888
  sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, r8,  r9,  r10, r11, rax, rbx, rdi, rsi,  24+0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   889
  sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, rsi, r8,  r9,  r10, r11, rax, rbx, rdi,  24+1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   890
  sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, rdi, rsi, r8,  r9,  r10, r11, rax, rbx,  24+2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   891
  sha256_AVX2_one_round_and_sched(xmm7, xmm4, xmm5, xmm6, rbx, rdi, rsi, r8,  r9,  r10, r11, rax,  24+3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   892
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   893
  addq(SRND, 4*32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   894
  cmpq(SRND, 3 * 4*32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   895
  jcc(Assembler::below, loop1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   896
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   897
bind(loop2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   898
  // Do last 16 rounds with no scheduling
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   899
  vpaddd(xmm9, xmm4, Address(TBL, SRND, Address::times_1, 0*32), AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   900
  vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 0*32), xmm9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   901
  sha256_AVX2_four_rounds_compute_first(0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   902
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   903
  vpaddd(xmm9, xmm5, Address(TBL, SRND, Address::times_1, 1*32), AVX_256bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   904
  vmovdqu(Address(rsp, SRND, Address::times_1, _XFER + 1*32), xmm9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   905
  sha256_AVX2_four_rounds_compute_last(0 + 8);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   906
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   907
  addq(SRND, 2*32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   908
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   909
  vmovdqu(xmm4, xmm6);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   910
  vmovdqu(xmm5, xmm7);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   911
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   912
  cmpq(SRND, 4 * 4*32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   913
  jcc(Assembler::below, loop2);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   914
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   915
  movq(CTX, Address(rsp, _CTX));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   916
  movq(INP, Address(rsp, _INP));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   917
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   918
  addm(4*0, CTX, a);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   919
  addm(4*1, CTX, b);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   920
  addm(4*2, CTX, c);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   921
  addm(4*3, CTX, d);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   922
  addm(4*4, CTX, e);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   923
  addm(4*5, CTX, f);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   924
  addm(4*6, CTX, g);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   925
  addm(4*7, CTX, h);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   926
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   927
  cmpq(INP, Address(rsp, _INP_END));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   928
  jcc(Assembler::above, done_hash);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   929
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   930
  //Do second block using previously scheduled results
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   931
  xorq(SRND, SRND);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   932
align(16);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   933
bind(loop3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   934
  sha256_AVX2_four_rounds_compute_first(4);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   935
  sha256_AVX2_four_rounds_compute_last(4+8);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   936
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   937
  addq(SRND, 2*32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   938
  cmpq(SRND, 4 * 4*32);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   939
  jcc(Assembler::below, loop3);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   940
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   941
  movq(CTX, Address(rsp, _CTX));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   942
  movq(INP, Address(rsp, _INP));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   943
  addq(INP, 64);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   944
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   945
  addm(4*0, CTX, a);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   946
  addm(4*1, CTX, b);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   947
  addm(4*2, CTX, c);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   948
  addm(4*3, CTX, d);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   949
  addm(4*4, CTX, e);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   950
  addm(4*5, CTX, f);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   951
  addm(4*6, CTX, g);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   952
  addm(4*7, CTX, h);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   953
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   954
  cmpq(INP, Address(rsp, _INP_END));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   955
  jcc(Assembler::below, loop0);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   956
  jccb(Assembler::above, done_hash);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   957
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   958
bind(do_last_block);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   959
  lea(TBL, ExternalAddress(K256_W));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   960
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   961
  movdqu(xmm4, Address(INP, 0*16));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   962
  movdqu(xmm5, Address(INP, 1*16));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   963
  movdqu(xmm6, Address(INP, 2*16));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   964
  movdqu(xmm7, Address(INP, 3*16));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   965
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   966
  vpshufb(xmm4, xmm4, xmm13, AVX_128bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   967
  vpshufb(xmm5, xmm5, xmm13, AVX_128bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   968
  vpshufb(xmm6, xmm6, xmm13, AVX_128bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   969
  vpshufb(xmm7, xmm7, xmm13, AVX_128bit);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   970
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   971
  jmp(last_block_enter);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   972
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   973
bind(only_one_block);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   974
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   975
  // load initial digest ;; table should be preloaded with following values
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   976
  movl(a, Address(CTX, 4*0));   // 0x6a09e667
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   977
  movl(b, Address(CTX, 4*1));   // 0xbb67ae85
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   978
  movl(c, Address(CTX, 4*2));   // 0x3c6ef372
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   979
  movl(d, Address(CTX, 4*3));   // 0xa54ff53a
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   980
  movl(e, Address(CTX, 4*4));   // 0x510e527f
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   981
  movl(f, Address(CTX, 4*5));   // 0x9b05688c
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
   982
  // load g - r10 after use as scratch
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   983
  movl(h, Address(CTX, 4*7));   // 0x5be0cd19
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   984
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   985
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   986
  pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   987
  vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //[PSHUFFLE_BYTE_FLIP_MASK wrt rip]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   988
  vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));     //[_SHUF_00BA wrt rip]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   989
  vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64));     //[_SHUF_DC00 wrt rip]
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   990
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
   991
  movl(g, Address(CTX, 4*6));   // 0x1f83d9ab
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
   992
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   993
  movq(Address(rsp, _CTX), CTX);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   994
  jmpb(do_last_block);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   995
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   996
bind(done_hash);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   997
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   998
  movq(rsp, Address(rsp, _RSP));
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
   999
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1000
  pop(r15);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1001
  pop(r14);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1002
  pop(r13);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1003
  pop(r12);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1004
  pop(rbp);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1005
#ifdef _WIN64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1006
  pop(rdi);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1007
  pop(rsi);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1008
#endif
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1009
  pop(rbx);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1010
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1011
#ifdef _WIN64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1012
  pop(r9);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1013
  pop(r8);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1014
#else
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1015
  pop(rdx);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1016
  pop(rcx);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1017
#endif
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1018
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1019
  if (multi_block) {
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1020
#ifdef _WIN64
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1021
const Register& limit_end = r9;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1022
const Register& ofs_end   = r8;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1023
#else
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1024
const Register& limit_end = rcx;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1025
const Register& ofs_end   = rdx;
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1026
#endif
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1027
    movq(rax, ofs_end);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1028
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1029
bind(compute_size1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1030
    cmpptr(rax, limit_end); // assume the original ofs <= limit
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1031
    jccb(Assembler::aboveEqual, compute_size_end1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1032
    addq(rax, 64);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1033
    jmpb(compute_size1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1034
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1035
bind(compute_size_end1);
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1036
  }
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1037
}
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1038
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1039
void MacroAssembler::sha512_AVX2_one_round_compute(Register  old_h, Register a, Register b, Register c,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1040
                                                   Register d, Register e, Register f, Register g, Register h,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1041
                                                   int iteration)
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1042
{
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1043
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1044
    const Register& y0 = r13;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1045
    const Register& y1 = r14;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1046
    const Register& y2 = r15;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1047
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1048
    const Register& y3 = rcx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1049
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1050
    const Register& y3 = rdi;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1051
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1052
    const Register& T1 = r12;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1053
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1054
    if (iteration % 4 > 0) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1055
      addq(old_h, y2); //h = k + w + h + S0 + S1 + CH = t1 + S0;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1056
    }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1057
    movq(y2, f); //y2 = f; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1058
    rorxq(y0, e, 41); //y0 = e >> 41; S1A
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1059
    rorxq(y1, e, 18); //y1 = e >> 18; S1B
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1060
    xorq(y2, g); //y2 = f^g; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1061
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1062
    xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18); S1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1063
    rorxq(y1, e, 14); //y1 = (e >> 14); S1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1064
    andq(y2, e); //y2 = (f^g)&e; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1065
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1066
    if (iteration % 4 > 0 ) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1067
      addq(old_h, y3); //h = t1 + S0 + MAJ
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1068
    }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1069
    xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18) ^ (e >> 14); S1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1070
    rorxq(T1, a, 34); //T1 = a >> 34; S0B
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1071
    xorq(y2, g); //y2 = CH = ((f^g)&e) ^g; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1072
    rorxq(y1, a, 39); //y1 = a >> 39; S0A
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1073
    movq(y3, a); //y3 = a; MAJA
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1074
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1075
    xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34); S0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1076
    rorxq(T1, a, 28); //T1 = (a >> 28); S0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1077
    addq(h, Address(rsp, (8 * iteration))); //h = k + w + h; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1078
    orq(y3, c); //y3 = a | c; MAJA
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1079
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1080
    xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34) ^ (a >> 28); S0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1081
    movq(T1, a); //T1 = a; MAJB
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1082
    andq(y3, b); //y3 = (a | c)&b; MAJA
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1083
    andq(T1, c); //T1 = a&c; MAJB
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1084
    addq(y2, y0); //y2 = S1 + CH; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1085
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1086
    addq(d, h); //d = k + w + h + d; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1087
    orq(y3, T1); //y3 = MAJ = (a | c)&b) | (a&c); MAJ
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1088
    addq(h, y1); //h = k + w + h + S0; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1089
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1090
    addq(d, y2); //d = k + w + h + d + S1 + CH = d + t1; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1091
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1092
    if (iteration % 4 == 3) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1093
      addq(h, y2); //h = k + w + h + S0 + S1 + CH = t1 + S0; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1094
      addq(h, y3); //h = t1 + S0 + MAJ; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1095
    }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1096
}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1097
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1098
void MacroAssembler::sha512_AVX2_one_round_and_schedule(
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1099
    XMMRegister xmm4, // ymm4
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1100
    XMMRegister xmm5, // ymm5
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1101
    XMMRegister xmm6, // ymm6
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1102
    XMMRegister xmm7, // ymm7
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1103
    Register a, //rax
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1104
    Register b, //rbx
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1105
    Register c, //rdi
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1106
    Register d, //rsi
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1107
    Register e, //r8
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1108
    Register f, //r9
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1109
    Register g, //r10
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1110
    Register h, //r11
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1111
    int iteration)
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1112
{
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1113
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1114
    const Register& y0 = r13;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1115
    const Register& y1 = r14;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1116
    const Register& y2 = r15;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1117
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1118
    const Register& y3 = rcx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1119
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1120
    const Register& y3 = rdi;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1121
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1122
    const Register& T1 = r12;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1123
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1124
    if (iteration % 4 == 0) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1125
      // Extract w[t - 7]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1126
      // xmm0 = W[-7]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1127
      vperm2f128(xmm0, xmm7, xmm6, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1128
      vpalignr(xmm0, xmm0, xmm6, 8, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1129
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1130
      // Calculate w[t - 16] + w[t - 7]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1131
      vpaddq(xmm0, xmm0, xmm4, AVX_256bit); //xmm0 = W[-7] + W[-16]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1132
      // Extract w[t - 15]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1133
      //xmm1 = W[-15]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1134
      vperm2f128(xmm1, xmm5, xmm4, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1135
      vpalignr(xmm1, xmm1, xmm4, 8, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1136
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1137
      // Calculate sigma0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1138
      // Calculate w[t - 15] ror 1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1139
      vpsrlq(xmm2, xmm1, 1, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1140
      vpsllq(xmm3, xmm1, (64 - 1), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1141
      vpor(xmm3, xmm3, xmm2, AVX_256bit); //xmm3 = W[-15] ror 1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1142
      // Calculate w[t - 15] shr 7
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1143
      vpsrlq(xmm8, xmm1, 7, AVX_256bit); //xmm8 = W[-15] >> 7
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1144
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1145
    } else if (iteration % 4 == 1) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1146
      //Calculate w[t - 15] ror 8
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1147
      vpsrlq(xmm2, xmm1, 8, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1148
      vpsllq(xmm1, xmm1, (64 - 8), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1149
      vpor(xmm1, xmm1, xmm2, AVX_256bit); //xmm1 = W[-15] ror 8
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1150
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1151
      //XOR the three components
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1152
      vpxor(xmm3, xmm3, xmm8, AVX_256bit); //xmm3 = W[-15] ror 1 ^ W[-15] >> 7
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1153
      vpxor(xmm1, xmm3, xmm1, AVX_256bit); //xmm1 = s0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1154
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1155
      //Add three components, w[t - 16], w[t - 7] and sigma0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1156
      vpaddq(xmm0, xmm0, xmm1, AVX_256bit); //xmm0 = W[-16] + W[-7] + s0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1157
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1158
      // Move to appropriate lanes for calculating w[16] and w[17]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1159
      vperm2f128(xmm4, xmm0, xmm0, 0); //xmm4 = W[-16] + W[-7] + s0{ BABA }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1160
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1161
      //Move to appropriate lanes for calculating w[18] and w[19]
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
  1162
      vpand(xmm0, xmm0, xmm10, AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 }
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1163
      //Calculate w[16] and w[17] in both 128 bit lanes
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1164
      //Calculate sigma1 for w[16] and w[17] on both 128 bit lanes
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1165
      vperm2f128(xmm2, xmm7, xmm7, 17); //xmm2 = W[-2] {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1166
      vpsrlq(xmm8, xmm2, 6, AVX_256bit); //xmm8 = W[-2] >> 6 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1167
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1168
    } else if (iteration % 4 == 2) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1169
      vpsrlq(xmm3, xmm2, 19, AVX_256bit); //xmm3 = W[-2] >> 19 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1170
      vpsllq(xmm1, xmm2, (64 - 19), AVX_256bit); //xmm1 = W[-2] << 19 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1171
      vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 19 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1172
      vpxor(xmm8, xmm8, xmm3, AVX_256bit);// xmm8 = W[-2] ror 19 ^ W[-2] >> 6 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1173
      vpsrlq(xmm3, xmm2, 61, AVX_256bit); //xmm3 = W[-2] >> 61 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1174
      vpsllq(xmm1, xmm2, (64 - 61), AVX_256bit); //xmm1 = W[-2] << 61 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1175
      vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 61 {BABA}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1176
      vpxor(xmm8, xmm8, xmm3, AVX_256bit); //xmm8 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) { BABA }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1177
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1178
      //Add sigma1 to the other components to get w[16] and w[17]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1179
      vpaddq(xmm4, xmm4, xmm8, AVX_256bit); //xmm4 = { W[1], W[0], W[1], W[0] }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1180
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1181
      //Calculate sigma1 for w[18] and w[19] for upper 128 bit lane
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1182
      vpsrlq(xmm8, xmm4, 6, AVX_256bit); //xmm8 = W[-2] >> 6 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1183
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1184
    } else if (iteration % 4 == 3){
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1185
      vpsrlq(xmm3, xmm4, 19, AVX_256bit); //xmm3 = W[-2] >> 19 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1186
      vpsllq(xmm1, xmm4, (64 - 19), AVX_256bit); //xmm1 = W[-2] << 19 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1187
      vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 19 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1188
      vpxor(xmm8, xmm8, xmm3, AVX_256bit); //xmm8 = W[-2] ror 19 ^ W[-2] >> 6 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1189
      vpsrlq(xmm3, xmm4, 61, AVX_256bit); //xmm3 = W[-2] >> 61 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1190
      vpsllq(xmm1, xmm4, (64 - 61), AVX_256bit); //xmm1 = W[-2] << 61 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1191
      vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 61 {DC--}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1192
      vpxor(xmm8, xmm8, xmm3, AVX_256bit); //xmm8 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) { DC-- }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1193
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1194
      //Add the sigma0 + w[t - 7] + w[t - 16] for w[18] and w[19] to newly calculated sigma1 to get w[18] and w[19]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1195
      vpaddq(xmm2, xmm0, xmm8, AVX_256bit); //xmm2 = { W[3], W[2], --, -- }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1196
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1197
      //Form w[19, w[18], w17], w[16]
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1198
      vpblendd(xmm4, xmm4, xmm2, 0xF0, AVX_256bit); //xmm4 = { W[3], W[2], W[1], W[0] }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1199
    }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1200
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1201
    movq(y3, a); //y3 = a; MAJA
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1202
    rorxq(y0, e, 41); // y0 = e >> 41; S1A
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1203
    rorxq(y1, e, 18); //y1 = e >> 18; S1B
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1204
    addq(h, Address(rsp, (iteration * 8))); //h = k + w + h; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1205
    orq(y3, c); //y3 = a | c; MAJA
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1206
    movq(y2, f); //y2 = f; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1207
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1208
    xorq(y2, g); //y2 = f^g; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1209
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1210
    rorxq(T1, a, 34); //T1 = a >> 34; S0B
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1211
    xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18); S1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1212
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1213
    rorxq(y1, e, 14); //y1 = (e >> 14); S1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1214
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1215
    andq(y2, e); //y2 = (f^g) & e; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1216
    addq(d, h); //d = k + w + h + d; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1217
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1218
    andq(y3, b); //y3 = (a | c)&b; MAJA
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1219
    xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18) ^ (e >> 14); S1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1220
    rorxq(y1, a, 39); //y1 = a >> 39; S0A
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1221
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1222
    xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34); S0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1223
    rorxq(T1, a, 28); //T1 = (a >> 28); S0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1224
    xorq(y2, g); //y2 = CH = ((f^g)&e) ^ g; CH
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1225
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1226
    xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34) ^ (a >> 28); S0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1227
    movq(T1, a); //T1 = a; MAJB
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1228
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1229
    andq(T1, c); //T1 = a&c; MAJB
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1230
    addq(y2, y0); //y2 = S1 + CH; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1231
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1232
    orq(y3, T1); //y3 = MAJ = (a | c)&b) | (a&c); MAJ
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1233
    addq(h, y1); //h = k + w + h + S0; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1234
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1235
    addq(d, y2); //d = k + w + h + d + S1 + CH = d + t1; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1236
    addq(h, y2); //h = k + w + h + S0 + S1 + CH = t1 + S0; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1237
    addq(h, y3); //h = t1 + S0 + MAJ; --
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1238
}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1239
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1240
void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1241
                                 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1242
                                 Register buf, Register state, Register ofs, Register limit, Register rsp,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1243
                                 bool multi_block, XMMRegister shuf_mask)
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1244
{
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1245
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1246
    Label loop0, loop1, loop2, done_hash,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1247
    compute_block_size, compute_size,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1248
    compute_block_size_end, compute_size_end;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1249
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1250
    address K512_W = StubRoutines::x86::k512_W_addr();
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1251
    address pshuffle_byte_flip_mask_sha512 = StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512();
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1252
    address pshuffle_byte_flip_mask_addr = 0;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1253
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1254
    const XMMRegister& XFER = xmm0; // YTMP0
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1255
    const XMMRegister& BYTE_FLIP_MASK = xmm9; // ymm9
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
  1256
    const XMMRegister& YMM_MASK_LO = xmm10; // ymm10
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1257
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1258
    const Register& INP = rcx; //1st arg
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1259
    const Register& CTX = rdx; //2nd arg
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1260
    const Register& NUM_BLKS = r8; //3rd arg
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1261
    const Register& c = rdi;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1262
    const Register& d = rsi;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1263
    const Register& e = r8;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1264
    const Register& y3 = rcx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1265
    const Register& offset = r8;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1266
    const Register& input_limit = r9;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1267
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1268
    const Register& INP = rdi; //1st arg
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1269
    const Register& CTX = rsi; //2nd arg
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1270
    const Register& NUM_BLKS = rdx; //3rd arg
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1271
    const Register& c  = rcx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1272
    const Register& d  = r8;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1273
    const Register& e  = rdx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1274
    const Register& y3 = rdi;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1275
    const Register& offset = rdx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1276
    const Register& input_limit = rcx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1277
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1278
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1279
    const Register& TBL = rbp;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1280
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1281
    const Register& a = rax;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1282
    const Register& b = rbx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1283
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1284
    const Register& f = r9;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1285
    const Register& g = r10;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1286
    const Register& h = r11;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1287
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1288
    //Local variables as defined in assembly file.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1289
    enum
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1290
    {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1291
      _XFER_SIZE = 4 * 8, // resq 4 => reserve 4 quadwords. Hence 4 * 8
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1292
      _SRND_SIZE = 8, // resq 1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1293
      _INP_SIZE = 8,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1294
      _INP_END_SIZE = 8,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1295
      _RSP_SAVE_SIZE = 8,  // defined as resq 1
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1296
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1297
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1298
      _GPR_SAVE_SIZE = 8 * 8, // defined as resq 8
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1299
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1300
      _GPR_SAVE_SIZE = 6 * 8 // resq 6
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1301
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1302
    };
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1303
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1304
    enum
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1305
    {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1306
      _XFER = 0,
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1307
      _SRND = _XFER + _XFER_SIZE, // 32
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1308
      _INP = _SRND + _SRND_SIZE, // 40
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1309
      _INP_END = _INP + _INP_SIZE, // 48
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1310
      _RSP = _INP_END + _INP_END_SIZE, // 56
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1311
      _GPR = _RSP + _RSP_SAVE_SIZE, // 64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1312
      _STACK_SIZE = _GPR + _GPR_SAVE_SIZE // 128 for windows and 112 for linux.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1313
    };
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1314
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1315
//Saving offset and limit as it will help with blocksize calculation for multiblock SHA512.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1316
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1317
    push(r8);    // win64: this is ofs
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1318
    push(r9);    // win64: this is limit, we need them again at the very end.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1319
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1320
    push(rdx);   // linux : this is ofs, need at the end for multiblock calculation
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1321
    push(rcx);   // linux: This is the limit.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1322
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1323
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1324
    //Allocate Stack Space
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1325
    movq(rax, rsp);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1326
    subq(rsp, _STACK_SIZE);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1327
    andq(rsp, -32);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1328
    movq(Address(rsp, _RSP), rax);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1329
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1330
    //Save GPRs
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1331
    movq(Address(rsp, _GPR), rbp);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1332
    movq(Address(rsp, (_GPR + 8)), rbx);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1333
    movq(Address(rsp, (_GPR + 16)), r12);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1334
    movq(Address(rsp, (_GPR + 24)), r13);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1335
    movq(Address(rsp, (_GPR + 32)), r14);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1336
    movq(Address(rsp, (_GPR + 40)), r15);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1337
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1338
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1339
    movq(Address(rsp, (_GPR + 48)), rsi);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1340
    movq(Address(rsp, (_GPR + 56)), rdi);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1341
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1342
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1343
    vpblendd(xmm0, xmm0, xmm1, 0xF0, AVX_128bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1344
    vpblendd(xmm0, xmm0, xmm1, 0xF0, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1345
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1346
    if (multi_block) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1347
      xorq(rax, rax);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1348
      bind(compute_block_size);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1349
      cmpptr(offset, input_limit); // Assuming that offset is less than limit.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1350
      jccb(Assembler::aboveEqual, compute_block_size_end);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1351
      addq(offset, 128);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1352
      addq(rax, 128);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1353
      jmpb(compute_block_size);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1354
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1355
      bind(compute_block_size_end);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1356
      movq(NUM_BLKS, rax);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1357
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1358
      cmpq(NUM_BLKS, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1359
      jcc(Assembler::equal, done_hash);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1360
    } else {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1361
      xorq(NUM_BLKS, NUM_BLKS); //If single block.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1362
      addq(NUM_BLKS, 128);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1363
    }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1364
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1365
    addq(NUM_BLKS, INP); //pointer to end of data
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1366
    movq(Address(rsp, _INP_END), NUM_BLKS);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1367
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1368
    //load initial digest
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1369
    movq(a, Address(CTX, 8 * 0));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1370
    movq(b, Address(CTX, 8 * 1));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1371
    movq(c, Address(CTX, 8 * 2));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1372
    movq(d, Address(CTX, 8 * 3));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1373
    movq(e, Address(CTX, 8 * 4));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1374
    movq(f, Address(CTX, 8 * 5));
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
  1375
    // load g - r10 after it is used as scratch
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1376
    movq(h, Address(CTX, 8 * 7));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1377
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1378
    pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1379
    vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip
43423
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
  1380
    vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
  1381
bcaab17f72a5 8171974: Fix for R10 Register clobbering with usage of ExternalAddress
vdeshpande
parents: 42039
diff changeset
  1382
    movq(g, Address(CTX, 8 * 6));
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1383
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1384
    bind(loop0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1385
    lea(TBL, ExternalAddress(K512_W));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1386
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1387
    //byte swap first 16 dwords
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1388
    vmovdqu(xmm4, Address(INP, 32 * 0));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1389
    vpshufb(xmm4, xmm4, BYTE_FLIP_MASK, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1390
    vmovdqu(xmm5, Address(INP, 32 * 1));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1391
    vpshufb(xmm5, xmm5, BYTE_FLIP_MASK, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1392
    vmovdqu(xmm6, Address(INP, 32 * 2));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1393
    vpshufb(xmm6, xmm6, BYTE_FLIP_MASK, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1394
    vmovdqu(xmm7, Address(INP, 32 * 3));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1395
    vpshufb(xmm7, xmm7, BYTE_FLIP_MASK, AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1396
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1397
    movq(Address(rsp, _INP), INP);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1398
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1399
    movslq(Address(rsp, _SRND), 4);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1400
    align(16);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1401
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1402
    //Schedule 64 input dwords, by calling sha512_AVX2_one_round_and_schedule
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1403
    bind(loop1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1404
    vpaddq(xmm0, xmm4, Address(TBL, 0 * 32), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1405
    vmovdqu(Address(rsp, _XFER), xmm0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1406
    //four rounds and schedule
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1407
    sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, a, b, c, d, e, f, g, h, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1408
    sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, h, a, b, c, d, e, f, g, 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1409
    sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, g, h, a, b, c, d, e, f, 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1410
    sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, f, g, h, a, b, c, d, e, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1411
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1412
    vpaddq(xmm0, xmm5, Address(TBL, 1 * 32), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1413
    vmovdqu(Address(rsp, _XFER), xmm0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1414
    //four rounds and schedule
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1415
    sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, e, f, g, h, a, b, c, d, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1416
    sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, d, e, f, g, h, a, b, c, 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1417
    sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, c, d, e, f, g, h, a, b, 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1418
    sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, b, c, d, e, f, g, h, a, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1419
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1420
    vpaddq(xmm0, xmm6, Address(TBL, 2 * 32), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1421
    vmovdqu(Address(rsp, _XFER), xmm0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1422
    //four rounds and schedule
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1423
    sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, a, b, c, d, e, f, g, h, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1424
    sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, h, a, b, c, d, e, f, g, 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1425
    sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, g, h, a, b, c, d, e, f, 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1426
    sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, f, g, h, a, b, c, d, e, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1427
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1428
    vpaddq(xmm0, xmm7, Address(TBL, 3 * 32), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1429
    vmovdqu(Address(rsp, _XFER), xmm0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1430
    addq(TBL, 4 * 32);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1431
    //four rounds and schedule
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1432
    sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, e, f, g, h, a, b, c, d, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1433
    sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, d, e, f, g, h, a, b, c, 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1434
    sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, c, d, e, f, g, h, a, b, 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1435
    sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, b, c, d, e, f, g, h, a, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1436
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1437
    subq(Address(rsp, _SRND), 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1438
    jcc(Assembler::notEqual, loop1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1439
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1440
    movslq(Address(rsp, _SRND), 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1441
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1442
    bind(loop2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1443
    vpaddq(xmm0, xmm4, Address(TBL, 0 * 32), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1444
    vmovdqu(Address(rsp, _XFER), xmm0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1445
    //four rounds and compute.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1446
    sha512_AVX2_one_round_compute(a, a, b, c, d, e, f, g, h, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1447
    sha512_AVX2_one_round_compute(h, h, a, b, c, d, e, f, g, 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1448
    sha512_AVX2_one_round_compute(g, g, h, a, b, c, d, e, f, 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1449
    sha512_AVX2_one_round_compute(f, f, g, h, a, b, c, d, e, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1450
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1451
    vpaddq(xmm0, xmm5, Address(TBL, 1 * 32), AVX_256bit);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1452
    vmovdqu(Address(rsp, _XFER), xmm0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1453
    addq(TBL, 2 * 32);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1454
    // four rounds and compute.
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1455
    sha512_AVX2_one_round_compute(e, e, f, g, h, a, b, c, d, 0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1456
    sha512_AVX2_one_round_compute(d, d, e, f, g, h, a, b, c, 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1457
    sha512_AVX2_one_round_compute(c, c, d, e, f, g, h, a, b, 2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1458
    sha512_AVX2_one_round_compute(b, b, c, d, e, f, g, h, a, 3);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1459
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1460
    vmovdqu(xmm4, xmm6);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1461
    vmovdqu(xmm5, xmm7);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1462
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1463
    subq(Address(rsp, _SRND), 1);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1464
    jcc(Assembler::notEqual, loop2);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1465
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1466
    addmq(8 * 0, CTX, a);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1467
    addmq(8 * 1, CTX, b);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1468
    addmq(8 * 2, CTX, c);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1469
    addmq(8 * 3, CTX, d);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1470
    addmq(8 * 4, CTX, e);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1471
    addmq(8 * 5, CTX, f);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1472
    addmq(8 * 6, CTX, g);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1473
    addmq(8 * 7, CTX, h);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1474
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1475
    movq(INP, Address(rsp, _INP));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1476
    addq(INP, 128);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1477
    cmpq(INP, Address(rsp, _INP_END));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1478
    jcc(Assembler::notEqual, loop0);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1479
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1480
    bind(done_hash);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1481
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1482
    //Restore GPRs
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1483
    movq(rbp, Address(rsp, (_GPR + 0)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1484
    movq(rbx, Address(rsp, (_GPR + 8)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1485
    movq(r12, Address(rsp, (_GPR + 16)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1486
    movq(r13, Address(rsp, (_GPR + 24)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1487
    movq(r14, Address(rsp, (_GPR + 32)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1488
    movq(r15, Address(rsp, (_GPR + 40)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1489
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1490
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1491
    movq(rsi, Address(rsp, (_GPR + 48)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1492
    movq(rdi, Address(rsp, (_GPR + 56)));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1493
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1494
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1495
    //Restore Stack Pointer
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1496
    movq(rsp, Address(rsp, _RSP));
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1497
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1498
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1499
    pop(r9);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1500
    pop(r8);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1501
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1502
    pop(rcx);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1503
    pop(rdx);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1504
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1505
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1506
    if (multi_block) {
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1507
#ifdef _WIN64
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1508
      const Register& limit_end = r9;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1509
      const Register& ofs_end = r8;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1510
#else
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1511
      const Register& limit_end = rcx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1512
      const Register& ofs_end   = rdx;
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1513
#endif
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1514
      movq(rax, ofs_end);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1515
      bind(compute_size);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1516
      cmpptr(rax, limit_end);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1517
      jccb(Assembler::aboveEqual, compute_size_end);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1518
      addq(rax, 128);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1519
      jmpb(compute_size);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1520
      bind(compute_size_end);
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1521
    }
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1522
}
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1523
38135
e06e2d071465 8154495: SHA256 AVX2 intrinsic (when no supports_sha() available)
jcivlin
parents: 36555
diff changeset
  1524
#endif //#ifdef _LP64
42039
db627462f2c9 8165381: Update for x86 SHA512 using AVX2
kvn
parents: 41333
diff changeset
  1525