src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
author coleenp
Wed, 14 Aug 2019 10:07:00 -0400
changeset 57745 789e967c2731
parent 47565 f4962ab855b6
permissions -rw-r--r--
5103339: Strengthen NoSafepointVerifier Summary: Add NSV check at possible safepoint transition or places that could take out locks. Consolidate with clearing unhandled oops. Reviewed-by: dholmes, rehn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
47565
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     1
// Copyright (c) 2017 Instituto de Pesquisas Eldorado. All rights reserved.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     2
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     3
//
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     4
// This code is free software; you can redistribute it and/or modify it
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     5
// under the terms of the GNU General Public License version 2 only, as
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     6
// published by the Free Software Foundation.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     7
//
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     8
// This code is distributed in the hope that it will be useful, but WITHOUT
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
     9
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    10
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    11
// version 2 for more details (a copy is included in the LICENSE file that
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    12
// accompanied this code).
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    13
//
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    14
// You should have received a copy of the GNU General Public License version
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    15
// 2 along with this work; if not, write to the Free Software Foundation,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    16
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    17
//
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    18
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    19
// or visit www.oracle.com if you need additional information or have any
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    20
// questions.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    21
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    22
// Implemented according to "Descriptions of SHA-256, SHA-384, and SHA-512"
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    23
// (http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf).
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    24
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    25
#include "asm/macroAssembler.inline.hpp"
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    26
#include "runtime/stubRoutines.hpp"
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    27
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    28
/**********************************************************************
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    29
 * SHA 256
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    30
 *********************************************************************/
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    31
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    32
void MacroAssembler::sha256_deque(const VectorRegister src,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    33
                                  const VectorRegister dst1,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    34
                                  const VectorRegister dst2,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    35
                                  const VectorRegister dst3) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    36
  vsldoi (dst1, src, src, 12);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    37
  vsldoi (dst2, src, src, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    38
  vsldoi (dst3, src, src, 4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    39
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    40
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    41
void MacroAssembler::sha256_round(const VectorRegister* hs,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    42
                                  const int total_hs,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    43
                                  int& h_cnt,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    44
                                  const VectorRegister kpw) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    45
  // convenience registers: cycle from 0-7 downwards
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    46
  const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    47
  const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    48
  const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    49
  const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    50
  const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    51
  const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    52
  const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    53
  const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    54
  // temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    55
  VectorRegister ch  = VR0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    56
  VectorRegister maj = VR1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    57
  VectorRegister bsa = VR2;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    58
  VectorRegister bse = VR3;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    59
  VectorRegister vt0 = VR4;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    60
  VectorRegister vt1 = VR5;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    61
  VectorRegister vt2 = VR6;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    62
  VectorRegister vt3 = VR7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    63
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    64
  vsel       (ch,  g,   f, e);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    65
  vxor       (maj, a,   b);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    66
  vshasigmaw (bse, e,   1, 0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    67
  vadduwm    (vt2, ch,  kpw);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    68
  vadduwm    (vt1, h,   bse);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    69
  vsel       (maj, b,   c, maj);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    70
  vadduwm    (vt3, vt1, vt2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    71
  vshasigmaw (bsa, a,   1, 0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    72
  vadduwm    (vt0, bsa, maj);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    73
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    74
  vadduwm    (d,   d,   vt3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    75
  vadduwm    (h,   vt3, vt0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    76
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    77
  // advance vector pointer to the next iteration
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    78
  h_cnt++;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    79
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    80
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    81
void MacroAssembler::sha256_load_h_vec(const VectorRegister a,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    82
                                       const VectorRegister e,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    83
                                       const Register hptr) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    84
  // temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    85
  Register tmp = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    86
  VectorRegister vt0 = VR0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    87
  VectorRegister vRb = VR6;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    88
  // labels
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    89
  Label sha256_aligned;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    90
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    91
  andi_  (tmp,  hptr, 0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    92
  lvx    (a,    hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    93
  addi   (tmp,  hptr, 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    94
  lvx    (e,    tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    95
  beq    (CCR0, sha256_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    96
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    97
  // handle unaligned accesses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    98
  load_perm(vRb, hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
    99
  addi   (tmp, hptr, 32);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   100
  vec_perm(a,   e,    vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   101
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   102
  lvx    (vt0,  tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   103
  vec_perm(e,   vt0,  vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   104
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   105
  // aligned accesses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   106
  bind(sha256_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   107
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   108
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   109
void MacroAssembler::sha256_load_w_plus_k_vec(const Register buf_in,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   110
                                              const VectorRegister* ws,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   111
                                              const int total_ws,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   112
                                              const Register k,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   113
                                              const VectorRegister* kpws,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   114
                                              const int total_kpws) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   115
  Label w_aligned, after_w_load;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   116
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   117
  Register tmp       = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   118
  VectorRegister vt0 = VR0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   119
  VectorRegister vt1 = VR1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   120
  VectorRegister vRb = VR6;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   121
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   122
  andi_ (tmp, buf_in, 0xF);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   123
  beq   (CCR0, w_aligned); // address ends with 0x0, not 0x8
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   124
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   125
  // deal with unaligned addresses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   126
  lvx    (ws[0], buf_in);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   127
  load_perm(vRb, buf_in);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   128
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   129
  for (int n = 1; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   130
    VectorRegister w_cur = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   131
    VectorRegister w_prev = ws[n-1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   132
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   133
    addi (tmp, buf_in, n * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   134
    lvx  (w_cur, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   135
    vec_perm(w_prev, w_cur, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   136
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   137
  addi   (tmp, buf_in, total_ws * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   138
  lvx    (vt0, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   139
  vec_perm(ws[total_ws-1], vt0, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   140
  b      (after_w_load);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   141
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   142
  bind(w_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   143
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   144
  // deal with aligned addresses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   145
  lvx(ws[0], buf_in);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   146
  for (int n = 1; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   147
    VectorRegister w = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   148
    addi (tmp, buf_in, n * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   149
    lvx  (w, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   150
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   151
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   152
  bind(after_w_load);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   153
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   154
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   155
  // Byte swapping within int values
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   156
  li       (tmp, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   157
  lvsl     (vt0, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   158
  vspltisb (vt1, 0xb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   159
  vxor     (vt1, vt0, vt1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   160
  for (int n = 0; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   161
    VectorRegister w = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   162
    vec_perm(w, w, vt1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   163
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   164
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   165
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   166
  // Loading k, which is always aligned to 16-bytes
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   167
  lvx    (kpws[0], k);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   168
  for (int n = 1; n < total_kpws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   169
    VectorRegister kpw = kpws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   170
    addi (tmp, k, 16 * n);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   171
    lvx  (kpw, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   172
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   173
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   174
  // Add w to K
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   175
  assert(total_ws == total_kpws, "Redesign the loop below");
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   176
  for (int n = 0; n < total_kpws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   177
    VectorRegister kpw = kpws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   178
    VectorRegister w   = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   179
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   180
    vadduwm  (kpw, kpw, w);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   181
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   182
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   183
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   184
void MacroAssembler::sha256_calc_4w(const VectorRegister w0,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   185
                                    const VectorRegister w1,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   186
                                    const VectorRegister w2,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   187
                                    const VectorRegister w3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   188
                                    const VectorRegister kpw0,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   189
                                    const VectorRegister kpw1,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   190
                                    const VectorRegister kpw2,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   191
                                    const VectorRegister kpw3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   192
                                    const Register j,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   193
                                    const Register k) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   194
  // Temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   195
  const VectorRegister  vt0  = VR0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   196
  const VectorRegister  vt1  = VR1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   197
  const VectorSRegister vsrt1 = vt1->to_vsr();
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   198
  const VectorRegister  vt2  = VR2;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   199
  const VectorRegister  vt3  = VR3;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   200
  const VectorSRegister vst3 = vt3->to_vsr();
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   201
  const VectorRegister  vt4  = VR4;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   202
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   203
  // load to k[j]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   204
  lvx        (vt0, j,   k);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   205
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   206
  // advance j
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   207
  addi       (j,   j,   16); // 16 bytes were read
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   208
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   209
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   210
  // b = w[j-15], w[j-14], w[j-13], w[j-12]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   211
  vsldoi     (vt1, w1,  w0, 12);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   212
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   213
  // c = w[j-7], w[j-6], w[j-5], w[j-4]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   214
  vsldoi     (vt2, w3,  w2, 12);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   215
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   216
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   217
  // b = w[j-15], w[j-14], w[j-13], w[j-12]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   218
  vsldoi     (vt1, w0,  w1, 4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   219
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   220
  // c = w[j-7], w[j-6], w[j-5], w[j-4]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   221
  vsldoi     (vt2, w2,  w3, 4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   222
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   223
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   224
  // d = w[j-2], w[j-1], w[j-4], w[j-3]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   225
  vsldoi     (vt3, w3,  w3, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   226
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   227
  // b = s0(w[j-15]) , s0(w[j-14]) , s0(w[j-13]) , s0(w[j-12])
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   228
  vshasigmaw (vt1, vt1, 0,  0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   229
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   230
  // d = s1(w[j-2]) , s1(w[j-1]) , s1(w[j-4]) , s1(w[j-3])
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   231
  vshasigmaw (vt3, vt3, 0,  0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   232
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   233
  // c = s0(w[j-15]) + w[j-7],
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   234
  //     s0(w[j-14]) + w[j-6],
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   235
  //     s0(w[j-13]) + w[j-5],
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   236
  //     s0(w[j-12]) + w[j-4]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   237
  vadduwm    (vt2, vt1, vt2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   238
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   239
  // c = s0(w[j-15]) + w[j-7] + w[j-16],
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   240
  //     s0(w[j-14]) + w[j-6] + w[j-15],
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   241
  //     s0(w[j-13]) + w[j-5] + w[j-14],
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   242
  //     s0(w[j-12]) + w[j-4] + w[j-13]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   243
  vadduwm    (vt2, vt2, w0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   244
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   245
  // e = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   246
  //     s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   247
  //     s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j-4]), // UNDEFINED
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   248
  //     s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j-3])  // UNDEFINED
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   249
  vadduwm    (vt4, vt2, vt3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   250
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   251
  // At this point, e[0] and e[1] are the correct values to be stored at w[j]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   252
  // and w[j+1].
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   253
  // e[2] and e[3] are not considered.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   254
  // b = s1(w[j]) , s1(s(w[j+1]) , UNDEFINED , UNDEFINED
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   255
  vshasigmaw (vt1, vt4, 0,  0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   256
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   257
  // v5 = s1(w[j-2]) , s1(w[j-1]) , s1(w[j]) , s1(w[j+1])
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   258
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   259
  xxmrgld    (vst3, vsrt1, vst3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   260
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   261
  xxmrghd    (vst3, vst3, vsrt1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   262
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   263
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   264
  // c = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   265
  //     s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   266
  //     s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j]),   // w[j+2]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   267
  //     s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j+1])  // w[j+4]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   268
  vadduwm    (vt2, vt2, vt3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   269
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   270
  // Updating w0 to w3 to hold the new previous 16 values from w.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   271
  vmr        (w0,  w1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   272
  vmr        (w1,  w2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   273
  vmr        (w2,  w3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   274
  vmr        (w3,  vt2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   275
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   276
  // store k + w to v9 (4 values at once)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   277
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   278
  vadduwm    (kpw0, vt2, vt0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   279
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   280
  vsldoi     (kpw1, kpw0, kpw0, 12);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   281
  vsldoi     (kpw2, kpw0, kpw0, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   282
  vsldoi     (kpw3, kpw0, kpw0, 4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   283
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   284
  vadduwm    (kpw3, vt2, vt0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   285
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   286
  vsldoi     (kpw2, kpw3, kpw3, 12);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   287
  vsldoi     (kpw1, kpw3, kpw3, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   288
  vsldoi     (kpw0, kpw3, kpw3, 4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   289
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   290
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   291
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   292
void MacroAssembler::sha256_update_sha_state(const VectorRegister a,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   293
                                             const VectorRegister b_,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   294
                                             const VectorRegister c,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   295
                                             const VectorRegister d,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   296
                                             const VectorRegister e,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   297
                                             const VectorRegister f,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   298
                                             const VectorRegister g,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   299
                                             const VectorRegister h,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   300
                                             const Register hptr) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   301
  // temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   302
  VectorRegister vt0  = VR0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   303
  VectorRegister vt1  = VR1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   304
  VectorRegister vt2  = VR2;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   305
  VectorRegister vt3  = VR3;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   306
  VectorRegister vt4  = VR4;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   307
  VectorRegister vt5  = VR5;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   308
  VectorRegister vaux = VR6;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   309
  VectorRegister vRb  = VR6;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   310
  Register tmp        = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   311
  Register of16       = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   312
  Register of32       = R9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   313
  Label state_load_aligned;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   314
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   315
  // Load hptr
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   316
  andi_   (tmp, hptr, 0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   317
  li      (of16, 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   318
  lvx     (vt0, hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   319
  lvx     (vt5, of16, hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   320
  beq     (CCR0, state_load_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   321
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   322
  // handle unaligned accesses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   323
  li      (of32, 32);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   324
  load_perm(vRb, hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   325
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   326
  vec_perm(vt0, vt5,  vRb);        // vt0 = hptr[0]..hptr[3]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   327
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   328
  lvx     (vt1, hptr, of32);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   329
  vec_perm(vt5, vt1,  vRb);        // vt5 = hptr[4]..hptr[7]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   330
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   331
  // aligned accesses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   332
  bind(state_load_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   333
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   334
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   335
  vmrglw  (vt1, b_, a);            // vt1 = {a, b, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   336
  vmrglw  (vt2, d, c);             // vt2 = {c, d, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   337
  vmrglw  (vt3, f, e);             // vt3 = {e, f, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   338
  vmrglw  (vt4, h, g);             // vt4 = {g, h, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   339
  xxmrgld (vt1->to_vsr(), vt2->to_vsr(), vt1->to_vsr()); // vt1 = {a, b, c, d}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   340
  xxmrgld (vt3->to_vsr(), vt4->to_vsr(), vt3->to_vsr()); // vt3 = {e, f, g, h}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   341
  vadduwm (a,   vt0, vt1);         // a = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   342
  vadduwm (e,   vt5, vt3);         // e = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   343
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   344
  // Save hptr back, works for any alignment
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   345
  xxswapd (vt0->to_vsr(), a->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   346
  stxvd2x (vt0->to_vsr(), hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   347
  xxswapd (vt5->to_vsr(), e->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   348
  stxvd2x (vt5->to_vsr(), of16, hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   349
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   350
  vmrglw  (vt1, a, b_);            // vt1 = {a, b, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   351
  vmrglw  (vt2, c, d);             // vt2 = {c, d, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   352
  vmrglw  (vt3, e, f);             // vt3 = {e, f, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   353
  vmrglw  (vt4, g, h);             // vt4 = {g, h, ?, ?}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   354
  xxmrgld (vt1->to_vsr(), vt1->to_vsr(), vt2->to_vsr()); // vt1 = {a, b, c, d}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   355
  xxmrgld (vt3->to_vsr(), vt3->to_vsr(), vt4->to_vsr()); // vt3 = {e, f, g, h}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   356
  vadduwm (d,   vt0, vt1);         // d = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   357
  vadduwm (h,   vt5, vt3);         // h = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   358
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   359
  // Save hptr back, works for any alignment
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   360
  stxvd2x (d->to_vsr(), hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   361
  stxvd2x (h->to_vsr(), of16, hptr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   362
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   363
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   364
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   365
static const uint32_t sha256_round_table[64] __attribute((aligned(16))) = {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   366
  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   367
  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   368
  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   369
  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   370
  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   371
  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   372
  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   373
  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   374
  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   375
  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   376
  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   377
  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   378
  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   379
  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   380
  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   381
  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   382
};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   383
static const uint32_t *sha256_round_consts = sha256_round_table;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   384
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   385
//   R3_ARG1   - byte[]  Input string with padding but in Big Endian
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   386
//   R4_ARG2   - int[]   SHA.state (at first, the root of primes)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   387
//   R5_ARG3   - int     offset
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   388
//   R6_ARG4   - int     limit
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   389
//
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   390
//   Internal Register usage:
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   391
//   R7        - k
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   392
//   R8        - tmp | j | of16
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   393
//   R9        - of32
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   394
//   VR0-VR8   - ch, maj, bsa, bse, vt0-vt3 | vt0-vt5, vaux/vRb
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   395
//   VR9-VR16  - a-h
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   396
//   VR17-VR20 - w0-w3
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   397
//   VR21-VR23 - vRb | vaux0-vaux2
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   398
//   VR24-VR27 - kpw0-kpw3
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   399
void MacroAssembler::sha256(bool multi_block) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   400
  static const ssize_t buf_size = 64;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   401
  static const uint8_t w_size = sizeof(sha256_round_table)/sizeof(uint32_t);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   402
#ifdef AIX
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   403
  // malloc provides 16 byte alignment
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   404
  if (((uintptr_t)sha256_round_consts & 0xF) != 0) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   405
    uint32_t *new_round_consts = (uint32_t*)malloc(sizeof(sha256_round_table));
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   406
    guarantee(new_round_consts, "oom");
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   407
    memcpy(new_round_consts, sha256_round_consts, sizeof(sha256_round_table));
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   408
    sha256_round_consts = (const uint32_t*)new_round_consts;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   409
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   410
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   411
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   412
  Register buf_in = R3_ARG1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   413
  Register state  = R4_ARG2;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   414
  Register ofs    = R5_ARG3;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   415
  Register limit  = R6_ARG4;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   416
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   417
  Label sha_loop, core_loop;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   418
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   419
  // Save non-volatile vector registers in the red zone
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   420
  static const VectorRegister nv[] = {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   421
    VR20, VR21, VR22, VR23, VR24, VR25, VR26, VR27/*, VR28, VR29, VR30, VR31*/
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   422
  };
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   423
  static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   424
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   425
  for (int c = 0; c < nv_size; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   426
    Register tmp = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   427
    li  (tmp, (c - (nv_size)) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   428
    stvx(nv[c], tmp, R1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   429
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   430
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   431
  // Load hash state to registers
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   432
  VectorRegister a = VR9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   433
  VectorRegister b = VR10;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   434
  VectorRegister c = VR11;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   435
  VectorRegister d = VR12;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   436
  VectorRegister e = VR13;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   437
  VectorRegister f = VR14;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   438
  VectorRegister g = VR15;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   439
  VectorRegister h = VR16;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   440
  static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   441
  static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   442
  // counter for cycling through hs vector to avoid register moves between iterations
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   443
  int h_cnt = 0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   444
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   445
  // Load a-h registers from the memory pointed by state
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   446
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   447
  sha256_load_h_vec(a, e, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   448
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   449
  sha256_load_h_vec(d, h, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   450
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   451
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   452
  // keep k loaded also during MultiBlock loops
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   453
  Register k = R7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   454
  assert(((uintptr_t)sha256_round_consts & 0xF) == 0, "k alignment");
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   455
  load_const_optimized(k, (address)sha256_round_consts, R0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   456
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   457
  // Avoiding redundant loads
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   458
  if (multi_block) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   459
    align(OptoLoopAlignment);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   460
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   461
  bind(sha_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   462
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   463
  sha256_deque(a, b, c, d);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   464
  sha256_deque(e, f, g, h);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   465
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   466
  sha256_deque(d, c, b, a);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   467
  sha256_deque(h, g, f, e);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   468
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   469
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   470
  // Load 16 elements from w out of the loop.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   471
  // Order of the int values is Endianess specific.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   472
  VectorRegister w0 = VR17;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   473
  VectorRegister w1 = VR18;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   474
  VectorRegister w2 = VR19;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   475
  VectorRegister w3 = VR20;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   476
  static const VectorRegister ws[] = {w0, w1, w2, w3};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   477
  static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   478
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   479
  VectorRegister kpw0 = VR24;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   480
  VectorRegister kpw1 = VR25;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   481
  VectorRegister kpw2 = VR26;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   482
  VectorRegister kpw3 = VR27;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   483
  static const VectorRegister kpws[] = {kpw0, kpw1, kpw2, kpw3};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   484
  static const int total_kpws = sizeof(kpws)/sizeof(VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   485
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   486
  sha256_load_w_plus_k_vec(buf_in, ws, total_ws, k, kpws, total_kpws);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   487
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   488
  // Cycle through the first 16 elements
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   489
  assert(total_ws == total_kpws, "Redesign the loop below");
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   490
  for (int n = 0; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   491
    VectorRegister vaux0 = VR21;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   492
    VectorRegister vaux1 = VR22;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   493
    VectorRegister vaux2 = VR23;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   494
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   495
    sha256_deque(kpws[n], vaux0, vaux1, vaux2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   496
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   497
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   498
    sha256_round(hs, total_hs, h_cnt, kpws[n]);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   499
    sha256_round(hs, total_hs, h_cnt, vaux0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   500
    sha256_round(hs, total_hs, h_cnt, vaux1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   501
    sha256_round(hs, total_hs, h_cnt, vaux2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   502
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   503
    sha256_round(hs, total_hs, h_cnt, vaux2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   504
    sha256_round(hs, total_hs, h_cnt, vaux1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   505
    sha256_round(hs, total_hs, h_cnt, vaux0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   506
    sha256_round(hs, total_hs, h_cnt, kpws[n]);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   507
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   508
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   509
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   510
  Register tmp = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   511
  // loop the 16th to the 64th iteration by 8 steps
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   512
  li   (tmp, (w_size - 16) / total_hs);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   513
  mtctr(tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   514
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   515
  // j will be aligned to 4 for loading words.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   516
  // Whenever read, advance the pointer (e.g: when j is used in a function)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   517
  Register j = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   518
  li   (j, 16*4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   519
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   520
  align(OptoLoopAlignment);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   521
  bind(core_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   522
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   523
  // due to VectorRegister rotate, always iterate in multiples of total_hs
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   524
  for (int n = 0; n < total_hs/4; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   525
    sha256_calc_4w(w0, w1, w2, w3, kpw0, kpw1, kpw2, kpw3, j, k);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   526
    sha256_round(hs, total_hs, h_cnt, kpw0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   527
    sha256_round(hs, total_hs, h_cnt, kpw1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   528
    sha256_round(hs, total_hs, h_cnt, kpw2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   529
    sha256_round(hs, total_hs, h_cnt, kpw3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   530
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   531
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   532
  bdnz   (core_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   533
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   534
  // Update hash state
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   535
  sha256_update_sha_state(a, b, c, d, e, f, g, h, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   536
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   537
  if (multi_block) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   538
    addi(buf_in, buf_in, buf_size);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   539
    addi(ofs, ofs, buf_size);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   540
    cmplw(CCR0, ofs, limit);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   541
    ble(CCR0, sha_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   542
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   543
    // return ofs
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   544
    mr(R3_RET, ofs);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   545
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   546
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   547
  // Restore non-volatile registers
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   548
  for (int c = 0; c < nv_size; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   549
    Register tmp = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   550
    li  (tmp, (c - (nv_size)) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   551
    lvx(nv[c], tmp, R1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   552
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   553
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   554
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   555
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   556
/**********************************************************************
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   557
 * SHA 512
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   558
 *********************************************************************/
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   559
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   560
void MacroAssembler::sha512_load_w_vec(const Register buf_in,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   561
                                       const VectorRegister* ws,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   562
                                       const int total_ws) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   563
  Register tmp       = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   564
  VectorRegister vRb = VR8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   565
  VectorRegister aux = VR9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   566
  Label is_aligned, after_alignment;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   567
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   568
  andi_  (tmp, buf_in, 0xF);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   569
  beq    (CCR0, is_aligned); // address ends with 0x0, not 0x8
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   570
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   571
  // deal with unaligned addresses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   572
  lvx    (ws[0], buf_in);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   573
  load_perm(vRb, buf_in);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   574
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   575
  for (int n = 1; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   576
    VectorRegister w_cur = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   577
    VectorRegister w_prev = ws[n-1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   578
    addi (tmp, buf_in, n * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   579
    lvx  (w_cur, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   580
    vec_perm(w_prev, w_cur, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   581
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   582
  addi   (tmp, buf_in, total_ws * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   583
  lvx    (aux, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   584
  vec_perm(ws[total_ws-1], aux, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   585
  b      (after_alignment);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   586
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   587
  bind(is_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   588
  lvx  (ws[0], buf_in);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   589
  for (int n = 1; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   590
    VectorRegister w = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   591
    addi (tmp, buf_in, n * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   592
    lvx  (w, tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   593
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   594
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   595
  bind(after_alignment);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   596
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   597
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   598
// Update hash state
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   599
void MacroAssembler::sha512_update_sha_state(const Register state,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   600
                                             const VectorRegister* hs,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   601
                                             const int total_hs) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   602
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   603
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   604
  int start_idx = 0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   605
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   606
  int start_idx = 1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   607
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   608
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   609
  // load initial hash from the memory pointed by state
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   610
  VectorRegister ini_a = VR10;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   611
  VectorRegister ini_c = VR12;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   612
  VectorRegister ini_e = VR14;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   613
  VectorRegister ini_g = VR16;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   614
  static const VectorRegister inis[] = {ini_a, ini_c, ini_e, ini_g};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   615
  static const int total_inis = sizeof(inis)/sizeof(VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   616
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   617
  Label state_save_aligned, after_state_save_aligned;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   618
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   619
  Register addr      = R7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   620
  Register tmp       = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   621
  VectorRegister vRb = VR8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   622
  VectorRegister aux = VR9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   623
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   624
  andi_(tmp, state, 0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   625
  beq(CCR0, state_save_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   626
  // deal with unaligned addresses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   627
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   628
  {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   629
    VectorRegister a = hs[0];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   630
    VectorRegister b_ = hs[1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   631
    VectorRegister c = hs[2];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   632
    VectorRegister d = hs[3];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   633
    VectorRegister e = hs[4];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   634
    VectorRegister f = hs[5];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   635
    VectorRegister g = hs[6];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   636
    VectorRegister h = hs[7];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   637
    load_perm(vRb, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   638
    lvx    (ini_a, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   639
    addi   (addr, state, 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   640
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   641
    lvx    (ini_c, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   642
    addi   (addr, state, 32);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   643
    vec_perm(ini_a, ini_c, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   644
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   645
    lvx    (ini_e, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   646
    addi   (addr, state, 48);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   647
    vec_perm(ini_c, ini_e, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   648
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   649
    lvx    (ini_g, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   650
    addi   (addr, state, 64);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   651
    vec_perm(ini_e, ini_g, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   652
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   653
    lvx    (aux, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   654
    vec_perm(ini_g, aux, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   655
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   656
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   657
    xxmrgld(a->to_vsr(), b_->to_vsr(), a->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   658
    xxmrgld(c->to_vsr(), d->to_vsr(), c->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   659
    xxmrgld(e->to_vsr(), f->to_vsr(), e->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   660
    xxmrgld(g->to_vsr(), h->to_vsr(), g->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   661
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   662
    xxmrgld(b_->to_vsr(), a->to_vsr(), b_->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   663
    xxmrgld(d->to_vsr(), c->to_vsr(), d->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   664
    xxmrgld(f->to_vsr(), e->to_vsr(), f->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   665
    xxmrgld(h->to_vsr(), g->to_vsr(), h->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   666
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   667
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   668
    for (int n = start_idx; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   669
      VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   670
      VectorRegister ini_cur = inis[n/2];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   671
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   672
      vaddudm(h_cur, ini_cur, h_cur);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   673
    }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   674
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   675
    for (int n = start_idx; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   676
      VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   677
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   678
      mfvrd  (tmp, h_cur);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   679
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   680
      std    (tmp, 8*n + 8, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   681
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   682
      std    (tmp, 8*n - 8, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   683
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   684
      vsldoi (aux, h_cur, h_cur, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   685
      mfvrd  (tmp, aux);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   686
      std    (tmp, 8*n + 0, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   687
    }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   688
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   689
    b      (after_state_save_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   690
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   691
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   692
  bind(state_save_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   693
  {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   694
    for (int n = 0; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   695
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   696
      VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   697
      VectorRegister h_next = hs[n+1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   698
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   699
      VectorRegister h_cur = hs[n+1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   700
      VectorRegister h_next = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   701
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   702
      VectorRegister ini_cur = inis[n/2];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   703
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   704
      if (n/2 == 0) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   705
        lvx(ini_cur, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   706
      } else {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   707
        addi(addr, state, (n/2) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   708
        lvx(ini_cur, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   709
      }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   710
      xxmrgld(h_cur->to_vsr(), h_next->to_vsr(), h_cur->to_vsr());
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   711
    }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   712
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   713
    for (int n = start_idx; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   714
      VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   715
      VectorRegister ini_cur = inis[n/2];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   716
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   717
      vaddudm(h_cur, ini_cur, h_cur);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   718
    }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   719
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   720
    for (int n = start_idx; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   721
      VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   722
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   723
      if (n/2 == 0) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   724
        stvx(h_cur, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   725
      } else {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   726
        addi(addr, state, (n/2) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   727
        stvx(h_cur, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   728
      }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   729
    }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   730
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   731
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   732
  bind(after_state_save_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   733
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   734
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   735
// Use h_cnt to cycle through hs elements but also increment it at the end
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   736
void MacroAssembler::sha512_round(const VectorRegister* hs,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   737
                                  const int total_hs, int& h_cnt,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   738
                                  const VectorRegister kpw) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   739
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   740
  // convenience registers: cycle from 0-7 downwards
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   741
  const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   742
  const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   743
  const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   744
  const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   745
  const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   746
  const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   747
  const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   748
  const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   749
  // temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   750
  const VectorRegister Ch   = VR20;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   751
  const VectorRegister Maj  = VR21;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   752
  const VectorRegister bsa  = VR22;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   753
  const VectorRegister bse  = VR23;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   754
  const VectorRegister tmp1 = VR24;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   755
  const VectorRegister tmp2 = VR25;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   756
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   757
  vsel      (Ch,   g,    f,   e);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   758
  vxor      (Maj,  a,    b);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   759
  vshasigmad(bse,  e,    1,   0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   760
  vaddudm   (tmp2, Ch,   kpw);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   761
  vaddudm   (tmp1, h,    bse);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   762
  vsel      (Maj,  b,    c,   Maj);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   763
  vaddudm   (tmp1, tmp1, tmp2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   764
  vshasigmad(bsa,  a,    1,   0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   765
  vaddudm   (tmp2, bsa,  Maj);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   766
  vaddudm   (d,    d,    tmp1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   767
  vaddudm   (h,    tmp1, tmp2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   768
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   769
  // advance vector pointer to the next iteration
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   770
  h_cnt++;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   771
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   772
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   773
void MacroAssembler::sha512_calc_2w(const VectorRegister w0,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   774
                                    const VectorRegister w1,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   775
                                    const VectorRegister w2,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   776
                                    const VectorRegister w3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   777
                                    const VectorRegister w4,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   778
                                    const VectorRegister w5,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   779
                                    const VectorRegister w6,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   780
                                    const VectorRegister w7,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   781
                                    const VectorRegister kpw0,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   782
                                    const VectorRegister kpw1,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   783
                                    const Register j,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   784
                                    const VectorRegister vRb,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   785
                                    const Register k) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   786
  // Temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   787
  const VectorRegister VR_a = VR20;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   788
  const VectorRegister VR_b = VR21;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   789
  const VectorRegister VR_c = VR22;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   790
  const VectorRegister VR_d = VR23;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   791
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   792
  // load to k[j]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   793
  lvx        (VR_a, j,    k);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   794
  // advance j
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   795
  addi       (j,    j,    16); // 16 bytes were read
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   796
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   797
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   798
  // v6 = w[j-15], w[j-14]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   799
  vperm      (VR_b, w1,   w0,  vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   800
  // v12 = w[j-7], w[j-6]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   801
  vperm      (VR_c, w5,   w4,  vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   802
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   803
  // v6 = w[j-15], w[j-14]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   804
  vperm      (VR_b, w0,   w1,  vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   805
  // v12 = w[j-7], w[j-6]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   806
  vperm      (VR_c, w4,   w5,  vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   807
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   808
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   809
  // v6 = s0(w[j-15]) , s0(w[j-14])
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   810
  vshasigmad (VR_b, VR_b,    0,   0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   811
  // v5 = s1(w[j-2]) , s1(w[j-1])
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   812
  vshasigmad (VR_d, w7,      0,   0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   813
  // v6 = s0(w[j-15]) + w[j-7] , s0(w[j-14]) + w[j-6]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   814
  vaddudm    (VR_b, VR_b, VR_c);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   815
  // v8 = s1(w[j-2]) + w[j-16] , s1(w[j-1]) + w[j-15]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   816
  vaddudm    (VR_d, VR_d, w0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   817
  // v9 = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   818
  //      s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   819
  vaddudm    (VR_c, VR_d, VR_b);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   820
  // Updating w0 to w7 to hold the new previous 16 values from w.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   821
  vmr        (w0,   w1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   822
  vmr        (w1,   w2);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   823
  vmr        (w2,   w3);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   824
  vmr        (w3,   w4);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   825
  vmr        (w4,   w5);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   826
  vmr        (w5,   w6);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   827
  vmr        (w6,   w7);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   828
  vmr        (w7,   VR_c);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   829
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   830
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   831
  // store k + w to kpw0 (2 values at once)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   832
  vaddudm    (kpw0, VR_c, VR_a);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   833
  // kpw1 holds (k + w)[1]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   834
  vsldoi     (kpw1, kpw0, kpw0, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   835
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   836
  // store k + w to kpw0 (2 values at once)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   837
  vaddudm    (kpw1, VR_c, VR_a);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   838
  // kpw1 holds (k + w)[1]
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   839
  vsldoi     (kpw0, kpw1, kpw1, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   840
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   841
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   842
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   843
void MacroAssembler::sha512_load_h_vec(const Register state,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   844
                                       const VectorRegister* hs,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   845
                                       const int total_hs) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   846
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   847
  VectorRegister a   = hs[0];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   848
  VectorRegister g   = hs[6];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   849
  int start_idx = 0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   850
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   851
  VectorRegister a   = hs[1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   852
  VectorRegister g   = hs[7];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   853
  int start_idx = 1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   854
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   855
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   856
  Register addr      = R7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   857
  VectorRegister vRb = VR8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   858
  Register tmp       = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   859
  Label state_aligned, after_state_aligned;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   860
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   861
  andi_(tmp, state, 0xf);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   862
  beq(CCR0, state_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   863
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   864
  // deal with unaligned addresses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   865
  VectorRegister aux = VR9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   866
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   867
  lvx(hs[start_idx], state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   868
  load_perm(vRb, state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   869
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   870
  for (int n = start_idx + 2; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   871
    VectorRegister h_cur   = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   872
    VectorRegister h_prev2 = hs[n - 2];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   873
    addi(addr, state, (n/2) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   874
    lvx(h_cur, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   875
    vec_perm(h_prev2, h_cur, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   876
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   877
  addi(addr, state, (total_hs/2) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   878
  lvx    (aux, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   879
  vec_perm(hs[total_hs - 2 + start_idx], aux, vRb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   880
  b      (after_state_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   881
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   882
  bind(state_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   883
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   884
  // deal with aligned addresses
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   885
  lvx(hs[start_idx], state);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   886
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   887
  for (int n = start_idx + 2; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   888
    VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   889
    addi(addr, state, (n/2) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   890
    lvx(h_cur, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   891
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   892
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   893
  bind(after_state_aligned);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   894
}
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   895
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   896
static const uint64_t sha512_round_table[80] __attribute((aligned(16))) = {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   897
  0x428a2f98d728ae22, 0x7137449123ef65cd,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   898
  0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   899
  0x3956c25bf348b538, 0x59f111f1b605d019,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   900
  0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   901
  0xd807aa98a3030242, 0x12835b0145706fbe,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   902
  0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   903
  0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   904
  0x9bdc06a725c71235, 0xc19bf174cf692694,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   905
  0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   906
  0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   907
  0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   908
  0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   909
  0x983e5152ee66dfab, 0xa831c66d2db43210,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   910
  0xb00327c898fb213f, 0xbf597fc7beef0ee4,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   911
  0xc6e00bf33da88fc2, 0xd5a79147930aa725,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   912
  0x06ca6351e003826f, 0x142929670a0e6e70,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   913
  0x27b70a8546d22ffc, 0x2e1b21385c26c926,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   914
  0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   915
  0x650a73548baf63de, 0x766a0abb3c77b2a8,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   916
  0x81c2c92e47edaee6, 0x92722c851482353b,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   917
  0xa2bfe8a14cf10364, 0xa81a664bbc423001,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   918
  0xc24b8b70d0f89791, 0xc76c51a30654be30,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   919
  0xd192e819d6ef5218, 0xd69906245565a910,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   920
  0xf40e35855771202a, 0x106aa07032bbd1b8,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   921
  0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   922
  0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   923
  0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   924
  0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   925
  0x748f82ee5defb2fc, 0x78a5636f43172f60,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   926
  0x84c87814a1f0ab72, 0x8cc702081a6439ec,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   927
  0x90befffa23631e28, 0xa4506cebde82bde9,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   928
  0xbef9a3f7b2c67915, 0xc67178f2e372532b,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   929
  0xca273eceea26619c, 0xd186b8c721c0c207,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   930
  0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   931
  0x06f067aa72176fba, 0x0a637dc5a2c898a6,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   932
  0x113f9804bef90dae, 0x1b710b35131c471b,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   933
  0x28db77f523047d84, 0x32caab7b40c72493,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   934
  0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   935
  0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   936
  0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   937
};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   938
static const uint64_t *sha512_round_consts = sha512_round_table;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   939
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   940
//   R3_ARG1   - byte[]  Input string with padding but in Big Endian
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   941
//   R4_ARG2   - int[]   SHA.state (at first, the root of primes)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   942
//   R5_ARG3   - int     offset
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   943
//   R6_ARG4   - int     limit
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   944
//
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   945
//   Internal Register usage:
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   946
//   R7 R8 R9  - volatile temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   947
//   VR0-VR7   - a-h
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   948
//   VR8       - vRb
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   949
//   VR9       - aux (highly volatile, use with care)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   950
//   VR10-VR17 - w0-w7 | ini_a-ini_h
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   951
//   VR18      - vsp16 | kplusw0
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   952
//   VR19      - vsp32 | kplusw1
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   953
//   VR20-VR25 - sha512_calc_2w and sha512_round temporaries
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   954
void MacroAssembler::sha512(bool multi_block) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   955
  static const ssize_t buf_size = 128;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   956
  static const uint8_t w_size = sizeof(sha512_round_table)/sizeof(uint64_t);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   957
#ifdef AIX
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   958
  // malloc provides 16 byte alignment
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   959
  if (((uintptr_t)sha512_round_consts & 0xF) != 0) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   960
    uint64_t *new_round_consts = (uint64_t*)malloc(sizeof(sha512_round_table));
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   961
    guarantee(new_round_consts, "oom");
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   962
    memcpy(new_round_consts, sha512_round_consts, sizeof(sha512_round_table));
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   963
    sha512_round_consts = (const uint64_t*)new_round_consts;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   964
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   965
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   966
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   967
  Register buf_in = R3_ARG1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   968
  Register state  = R4_ARG2;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   969
  Register ofs    = R5_ARG3;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   970
  Register limit  = R6_ARG4;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   971
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   972
  Label sha_loop, core_loop;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   973
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   974
  // Save non-volatile vector registers in the red zone
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   975
  static const VectorRegister nv[] = {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   976
    VR20, VR21, VR22, VR23, VR24, VR25/*, VR26, VR27, VR28, VR29, VR30, VR31*/
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   977
  };
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   978
  static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   979
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   980
  for (int c = 0; c < nv_size; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   981
    Register idx = R7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   982
    li  (idx, (c - (nv_size)) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   983
    stvx(nv[c], idx, R1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   984
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   985
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   986
  // Load hash state to registers
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   987
  VectorRegister a = VR0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   988
  VectorRegister b = VR1;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   989
  VectorRegister c = VR2;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   990
  VectorRegister d = VR3;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   991
  VectorRegister e = VR4;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   992
  VectorRegister f = VR5;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   993
  VectorRegister g = VR6;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   994
  VectorRegister h = VR7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   995
  static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   996
  static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   997
  // counter for cycling through hs vector to avoid register moves between iterations
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   998
  int h_cnt = 0;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
   999
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1000
  // Load a-h registers from the memory pointed by state
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1001
  sha512_load_h_vec(state, hs, total_hs);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1002
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1003
  Register k = R9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1004
  assert(((uintptr_t)sha512_round_consts & 0xF) == 0, "k alignment");
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1005
  load_const_optimized(k, (address)sha512_round_consts, R0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1006
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1007
  if (multi_block) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1008
    align(OptoLoopAlignment);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1009
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1010
  bind(sha_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1011
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1012
  for (int n = 0; n < total_hs; n += 2) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1013
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1014
    VectorRegister h_cur = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1015
    VectorRegister h_next = hs[n + 1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1016
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1017
    VectorRegister h_cur = hs[n + 1];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1018
    VectorRegister h_next = hs[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1019
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1020
    vsldoi (h_next, h_cur, h_cur, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1021
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1022
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1023
  // Load 16 elements from w out of the loop.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1024
  // Order of the long values is Endianess specific.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1025
  VectorRegister w0 = VR10;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1026
  VectorRegister w1 = VR11;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1027
  VectorRegister w2 = VR12;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1028
  VectorRegister w3 = VR13;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1029
  VectorRegister w4 = VR14;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1030
  VectorRegister w5 = VR15;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1031
  VectorRegister w6 = VR16;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1032
  VectorRegister w7 = VR17;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1033
  static const VectorRegister ws[] = {w0, w1, w2, w3, w4, w5, w6, w7};
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1034
  static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1035
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1036
  // Load 16 w into vectors and setup vsl for vperm
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1037
  sha512_load_w_vec(buf_in, ws, total_ws);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1038
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1039
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1040
  VectorRegister vsp16 = VR18;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1041
  VectorRegister vsp32 = VR19;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1042
  VectorRegister shiftarg = VR9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1043
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1044
  vspltisw(vsp16,    8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1045
  vspltisw(shiftarg, 1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1046
  vsl     (vsp16,    vsp16, shiftarg);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1047
  vsl     (vsp32,    vsp16, shiftarg);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1048
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1049
  VectorRegister vsp8 = VR9;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1050
  vspltish(vsp8,     8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1051
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1052
  // Convert input from Big Endian to Little Endian
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1053
  for (int c = 0; c < total_ws; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1054
    VectorRegister w = ws[c];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1055
    vrlh  (w, w, vsp8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1056
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1057
  for (int c = 0; c < total_ws; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1058
    VectorRegister w = ws[c];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1059
    vrlw  (w, w, vsp16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1060
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1061
  for (int c = 0; c < total_ws; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1062
    VectorRegister w = ws[c];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1063
    vrld  (w, w, vsp32);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1064
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1065
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1066
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1067
  Register Rb        = R10;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1068
  VectorRegister vRb = VR8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1069
  li      (Rb, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1070
  load_perm(vRb, Rb);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1071
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1072
  VectorRegister kplusw0 = VR18;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1073
  VectorRegister kplusw1 = VR19;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1074
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1075
  Register addr      = R7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1076
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1077
  for (int n = 0; n < total_ws; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1078
    VectorRegister w = ws[n];
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1079
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1080
    if (n == 0) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1081
      lvx  (kplusw0, k);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1082
    } else {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1083
      addi (addr, k, n * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1084
      lvx  (kplusw0, addr);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1085
    }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1086
#if defined(VM_LITTLE_ENDIAN)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1087
    vaddudm(kplusw0, kplusw0, w);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1088
    vsldoi (kplusw1, kplusw0, kplusw0, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1089
#else
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1090
    vaddudm(kplusw1, kplusw0, w);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1091
    vsldoi (kplusw0, kplusw1, kplusw1, 8);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1092
#endif
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1093
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1094
    sha512_round(hs, total_hs, h_cnt, kplusw0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1095
    sha512_round(hs, total_hs, h_cnt, kplusw1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1096
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1097
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1098
  Register tmp       = R8;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1099
  li    (tmp, (w_size-16)/total_hs);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1100
  mtctr (tmp);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1101
  // j will be aligned to 4 for loading words.
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1102
  // Whenever read, advance the pointer (e.g: when j is used in a function)
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1103
  Register j = tmp;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1104
  li     (j, 8*16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1105
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1106
  align(OptoLoopAlignment);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1107
  bind(core_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1108
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1109
  // due to VectorRegister rotate, always iterate in multiples of total_hs
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1110
  for (int n = 0; n < total_hs/2; n++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1111
    sha512_calc_2w(w0, w1, w2, w3, w4, w5, w6, w7, kplusw0, kplusw1, j, vRb, k);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1112
    sha512_round(hs, total_hs, h_cnt, kplusw0);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1113
    sha512_round(hs, total_hs, h_cnt, kplusw1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1114
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1115
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1116
  bdnz   (core_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1117
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1118
  sha512_update_sha_state(state, hs, total_hs);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1119
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1120
  if (multi_block) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1121
    addi(buf_in, buf_in, buf_size);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1122
    addi(ofs, ofs, buf_size);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1123
    cmplw(CCR0, ofs, limit);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1124
    ble(CCR0, sha_loop);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1125
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1126
    // return ofs
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1127
    mr(R3_RET, ofs);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1128
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1129
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1130
  // Restore non-volatile registers
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1131
  for (int c = 0; c < nv_size; c++) {
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1132
    Register idx = R7;
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1133
    li  (idx, (c - (nv_size)) * 16);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1134
    lvx(nv[c], idx, R1);
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1135
  }
f4962ab855b6 8185979: PPC64: Implement SHA2 intrinsic
mdoerr
parents:
diff changeset
  1136
}