src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp
author shade
Thu, 13 Dec 2018 16:14:07 +0100
changeset 53017 e10a1f7aaa13
parent 52990 1ed8de9045a7
child 57786 948ac3112da8
permissions -rw-r--r--
8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions) Reviewed-by: thartmann
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
52990
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     1
/*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     2
* Copyright (c) 2018, Intel Corporation.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     3
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     5
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     6
* This code is free software; you can redistribute it and/or modify it
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     7
* under the terms of the GNU General Public License version 2 only, as
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     8
* published by the Free Software Foundation.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
     9
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    10
* This code is distributed in the hope that it will be useful, but WITHOUT
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    12
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    13
* version 2 for more details (a copy is included in the LICENSE file that
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    14
* accompanied this code).
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    15
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    16
* You should have received a copy of the GNU General Public License version
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    17
* 2 along with this work; if not, write to the Free Software Foundation,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    19
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    21
* or visit www.oracle.com if you need additional information or have any
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    22
* questions.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    23
*
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    24
*/
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    25
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    26
#include "precompiled.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    27
#include "asm/assembler.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    28
#include "asm/assembler.inline.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    29
#include "runtime/stubRoutines.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    30
#include "macroAssembler_x86.hpp"
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    31
53017
e10a1f7aaa13 8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents: 52990
diff changeset
    32
#ifdef _LP64
52990
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    33
// Multiply 128 x 128 bits, using 4 pclmulqdq operations
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    34
void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    35
    XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    36
    movdqu(xmm15, Address(htbl, i * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    37
    vpclmulhqlqdq(tmp3, data, xmm15); // 0x01
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    38
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    39
    vpclmulldq(tmp3, data, xmm15); // 0x00
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    40
    vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    41
    vpclmulhdq(tmp3, data, xmm15); // 0x11
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    42
    vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    43
    vpclmullqhqdq(tmp3, data, xmm15); // 0x10
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    44
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    45
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    46
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    47
// Multiply two 128 bit numbers resulting in a 256 bit value
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    48
// Result of the multiplication followed by reduction stored in state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    49
void MacroAssembler::gfmul(XMMRegister tmp0, XMMRegister state) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    50
    const XMMRegister tmp1 = xmm4;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    51
    const XMMRegister tmp2 = xmm5;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    52
    const XMMRegister tmp3 = xmm6;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    53
    const XMMRegister tmp4 = xmm7;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    54
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    55
    vpclmulldq(tmp1, state, tmp0); //0x00  (a0 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    56
    vpclmulhdq(tmp4, state, tmp0);//0x11 (a1 * b1)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    57
    vpclmullqhqdq(tmp2, state, tmp0);//0x10 (a1 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    58
    vpclmulhqlqdq(tmp3, state, tmp0); //0x01 (a0 * b1)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    59
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    60
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit); // (a0 * b1) + (a1 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    61
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    62
    vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    63
    vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    64
    vpxor(tmp1, tmp1, tmp3, Assembler::AVX_128bit); // tmp1 and tmp4 hold the result
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    65
    vpxor(tmp4, tmp4, tmp2, Assembler::AVX_128bit); // of carryless multiplication
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    66
    // Follows the reduction technique mentioned in
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    67
    // Shift-XOR reduction described in Gueron-Kounavis May 2010
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    68
    // First phase of reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    69
    //
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    70
    vpslld(xmm8, tmp1, 31, Assembler::AVX_128bit); // packed right shift shifting << 31
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    71
    vpslld(xmm9, tmp1, 30, Assembler::AVX_128bit); // packed right shift shifting << 30
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    72
    vpslld(xmm10, tmp1, 25, Assembler::AVX_128bit);// packed right shift shifting << 25
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    73
    // xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    74
    vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    75
    vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    76
    vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    77
    vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    78
    vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit);// first phase of the reduction complete
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    79
    //
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    80
    // Second phase of the reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    81
    //
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    82
    vpsrld(xmm9, tmp1, 1, Assembler::AVX_128bit);// packed left shifting >> 1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    83
    vpsrld(xmm10, tmp1, 2, Assembler::AVX_128bit);// packed left shifting >> 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    84
    vpsrld(xmm11, tmp1, 7, Assembler::AVX_128bit);// packed left shifting >> 7
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    85
    vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit);// xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    86
    vpxor(xmm9, xmm9, xmm11, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    87
    vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    88
    vpxor(tmp1, tmp1, xmm9, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    89
    vpxor(state, tmp4, tmp1, Assembler::AVX_128bit);// the result is in state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    90
    ret(0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    91
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    92
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    93
// This method takes the subkey after expansion as input and generates 1 * 16 power of subkey H.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    94
// The power of H is used in reduction process for one block ghash
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    95
void MacroAssembler::generateHtbl_one_block(Register htbl) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    96
    const XMMRegister t = xmm13;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    97
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    98
    // load the original subkey hash
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
    99
    movdqu(t, Address(htbl, 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   100
    // shuffle using long swap mask
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   101
    movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   102
    vpshufb(t, t, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   103
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   104
    // Compute H' = GFMUL(H, 2)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   105
    vpsrld(xmm3, t, 7, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   106
    movdqu(xmm4, ExternalAddress(StubRoutines::x86::ghash_shufflemask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   107
    vpshufb(xmm3, xmm3, xmm4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   108
    movl(rax, 0xff00);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   109
    movdl(xmm4, rax);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   110
    vpshufb(xmm4, xmm4, xmm3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   111
    movdqu(xmm5, ExternalAddress(StubRoutines::x86::ghash_polynomial_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   112
    vpand(xmm5, xmm5, xmm4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   113
    vpsrld(xmm3, t, 31, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   114
    vpslld(xmm4, t, 1, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   115
    vpslldq(xmm3, xmm3, 4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   116
    vpxor(t, xmm4, xmm3, Assembler::AVX_128bit);// t holds p(x) <<1 or H * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   117
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   118
    //Adding p(x)<<1 to xmm5 which holds the reduction polynomial
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   119
    vpxor(t, t, xmm5, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   120
    movdqu(Address(htbl, 1 * 16), t); // H * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   121
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   122
    ret(0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   123
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   124
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   125
// This method takes the subkey after expansion as input and generates the remaining powers of subkey H.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   126
// The power of H is used in reduction process for eight block ghash
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   127
void MacroAssembler::generateHtbl_eight_blocks(Register htbl) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   128
    const XMMRegister t = xmm13;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   129
    const XMMRegister tmp0 = xmm1;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   130
    Label GFMUL;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   131
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   132
    movdqu(t, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   133
    movdqu(tmp0, t);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   134
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   135
    // tmp0 and t hold H. Now we compute powers of H by using GFMUL(H, H)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   136
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   137
    movdqu(Address(htbl, 2 * 16), t); //H ^ 2 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   138
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   139
    movdqu(Address(htbl, 3 * 16), t); //H ^ 3 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   140
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   141
    movdqu(Address(htbl, 4 * 16), t); //H ^ 4 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   142
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   143
    movdqu(Address(htbl, 5 * 16), t); //H ^ 5 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   144
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   145
    movdqu(Address(htbl, 6 * 16), t); //H ^ 6 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   146
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   147
    movdqu(Address(htbl, 7 * 16), t); //H ^ 7 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   148
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   149
    movdqu(Address(htbl, 8 * 16), t); //H ^ 8 * 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   150
    ret(0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   151
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   152
    bind(GFMUL);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   153
    gfmul(tmp0, t);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   154
}
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   155
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   156
// Multiblock and single block GHASH computation using Shift XOR reduction technique
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   157
void MacroAssembler::avx_ghash(Register input_state, Register htbl,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   158
    Register input_data, Register blocks) {
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   159
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   160
    // temporary variables to hold input data and input state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   161
    const XMMRegister data = xmm1;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   162
    const XMMRegister state = xmm0;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   163
    // temporary variables to hold intermediate results
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   164
    const XMMRegister tmp0 = xmm3;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   165
    const XMMRegister tmp1 = xmm4;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   166
    const XMMRegister tmp2 = xmm5;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   167
    const XMMRegister tmp3 = xmm6;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   168
    // temporary variables to hold byte and long swap masks
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   169
    const XMMRegister bswap_mask = xmm2;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   170
    const XMMRegister lswap_mask = xmm14;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   171
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   172
    Label GENERATE_HTBL_1_BLK, GENERATE_HTBL_8_BLKS, BEGIN_PROCESS, GFMUL, BLOCK8_REDUCTION,
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   173
          ONE_BLK_INIT, PROCESS_1_BLOCK, PROCESS_8_BLOCKS, SAVE_STATE, EXIT_GHASH;
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   174
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   175
    testptr(blocks, blocks);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   176
    jcc(Assembler::zero, EXIT_GHASH);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   177
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   178
    // Check if Hashtable (1*16) has been already generated
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   179
    // For anything less than 8 blocks, we generate only the first power of H.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   180
    movdqu(tmp2, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   181
    ptest(tmp2, tmp2);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   182
    jcc(Assembler::notZero, BEGIN_PROCESS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   183
    call(GENERATE_HTBL_1_BLK, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   184
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   185
    // Shuffle the input state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   186
    bind(BEGIN_PROCESS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   187
    movdqu(lswap_mask, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   188
    movdqu(state, Address(input_state, 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   189
    vpshufb(state, state, lswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   190
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   191
    cmpl(blocks, 8);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   192
    jcc(Assembler::below, ONE_BLK_INIT);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   193
    // If we have 8 blocks or more data, then generate remaining powers of H
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   194
    movdqu(tmp2, Address(htbl, 8 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   195
    ptest(tmp2, tmp2);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   196
    jcc(Assembler::notZero, PROCESS_8_BLOCKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   197
    call(GENERATE_HTBL_8_BLKS, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   198
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   199
    //Do 8 multiplies followed by a reduction processing 8 blocks of data at a time
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   200
    //Each block = 16 bytes.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   201
    bind(PROCESS_8_BLOCKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   202
    subl(blocks, 8);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   203
    movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   204
    movdqu(data, Address(input_data, 16 * 7));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   205
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   206
    //Loading 1*16 as calculated powers of H required starts at that location.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   207
    movdqu(xmm15, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   208
    //Perform carryless multiplication of (H*2, data block #7)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   209
    vpclmulhqlqdq(tmp2, data, xmm15);//a0 * b1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   210
    vpclmulldq(tmp0, data, xmm15);//a0 * b0
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   211
    vpclmulhdq(tmp1, data, xmm15);//a1 * b1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   212
    vpclmullqhqdq(tmp3, data, xmm15);//a1* b0
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   213
    vpxor(tmp2, tmp2, tmp3, Assembler::AVX_128bit);// (a0 * b1) + (a1 * b0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   214
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   215
    movdqu(data, Address(input_data, 16 * 6));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   216
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   217
    // Perform carryless multiplication of (H^2 * 2, data block #6)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   218
    schoolbookAAD(2, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   219
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   220
    movdqu(data, Address(input_data, 16 * 5));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   221
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   222
    // Perform carryless multiplication of (H^3 * 2, data block #5)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   223
    schoolbookAAD(3, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   224
    movdqu(data, Address(input_data, 16 * 4));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   225
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   226
    // Perform carryless multiplication of (H^4 * 2, data block #4)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   227
    schoolbookAAD(4, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   228
    movdqu(data, Address(input_data, 16 * 3));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   229
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   230
    // Perform carryless multiplication of (H^5 * 2, data block #3)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   231
    schoolbookAAD(5, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   232
    movdqu(data, Address(input_data, 16 * 2));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   233
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   234
    // Perform carryless multiplication of (H^6 * 2, data block #2)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   235
    schoolbookAAD(6, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   236
    movdqu(data, Address(input_data, 16 * 1));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   237
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   238
    // Perform carryless multiplication of (H^7 * 2, data block #1)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   239
    schoolbookAAD(7, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   240
    movdqu(data, Address(input_data, 16 * 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   241
    // xor data block#0 with input state before perfoming carry-less multiplication
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   242
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   243
    vpxor(data, data, state, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   244
    // Perform carryless multiplication of (H^8 * 2, data block #0)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   245
    schoolbookAAD(8, htbl, data, tmp0, tmp1, tmp2, tmp3);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   246
    vpslldq(tmp3, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   247
    vpsrldq(tmp2, tmp2, 8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   248
    vpxor(tmp0, tmp0, tmp3, Assembler::AVX_128bit);// tmp0, tmp1 contains aggregated results of
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   249
    vpxor(tmp1, tmp1, tmp2, Assembler::AVX_128bit);// the multiplication operation
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   250
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   251
    // we have the 2 128-bit partially accumulated multiplication results in tmp0:tmp1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   252
    // with higher 128-bit in tmp1 and lower 128-bit in corresponding tmp0
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   253
    // Follows the reduction technique mentioned in
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   254
    // Shift-XOR reduction described in Gueron-Kounavis May 2010
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   255
    bind(BLOCK8_REDUCTION);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   256
    // First Phase of the reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   257
    vpslld(xmm8, tmp0, 31, Assembler::AVX_128bit); // packed right shifting << 31
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   258
    vpslld(xmm9, tmp0, 30, Assembler::AVX_128bit); // packed right shifting << 30
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   259
    vpslld(xmm10, tmp0, 25, Assembler::AVX_128bit); // packed right shifting << 25
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   260
    // xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   261
    vpxor(xmm8, xmm8, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   262
    vpxor(xmm8, xmm8, xmm9, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   263
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   264
    vpslldq(xmm9, xmm8, 12, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   265
    vpsrldq(xmm8, xmm8, 4, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   266
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   267
    vpxor(tmp0, tmp0, xmm9, Assembler::AVX_128bit); // first phase of reduction is complete
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   268
    // second phase of the reduction
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   269
    vpsrld(xmm9, tmp0, 1, Assembler::AVX_128bit); // packed left shifting >> 1
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   270
    vpsrld(xmm10, tmp0, 2, Assembler::AVX_128bit); // packed left shifting >> 2
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   271
    vpsrld(tmp2, tmp0, 7, Assembler::AVX_128bit); // packed left shifting >> 7
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   272
    // xor the shifted versions
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   273
    vpxor(xmm9, xmm9, xmm10, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   274
    vpxor(xmm9, xmm9, tmp2, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   275
    vpxor(xmm9, xmm9, xmm8, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   276
    vpxor(tmp0, xmm9, tmp0, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   277
    // Final result is in state
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   278
    vpxor(state, tmp0, tmp1, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   279
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   280
    lea(input_data, Address(input_data, 16 * 8));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   281
    cmpl(blocks, 8);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   282
    jcc(Assembler::below, ONE_BLK_INIT);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   283
    jmp(PROCESS_8_BLOCKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   284
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   285
    // Since this is one block operation we will only use H * 2 i.e. the first power of H
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   286
    bind(ONE_BLK_INIT);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   287
    movdqu(tmp0, Address(htbl, 1 * 16));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   288
    movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   289
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   290
    //Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a reduction.
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   291
    bind(PROCESS_1_BLOCK);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   292
    cmpl(blocks, 0);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   293
    jcc(Assembler::equal, SAVE_STATE);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   294
    subl(blocks, 1);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   295
    movdqu(data, Address(input_data, 0));
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   296
    vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   297
    vpxor(state, state, data, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   298
    // gfmul(H*2, state)
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   299
    call(GFMUL, relocInfo::none);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   300
    addptr(input_data, 16);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   301
    jmp(PROCESS_1_BLOCK);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   302
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   303
    bind(SAVE_STATE);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   304
    vpshufb(state, state, lswap_mask, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   305
    movdqu(Address(input_state, 0), state);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   306
    jmp(EXIT_GHASH);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   307
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   308
    bind(GFMUL);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   309
    gfmul(tmp0, state);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   310
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   311
    bind(GENERATE_HTBL_1_BLK);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   312
    generateHtbl_one_block(htbl);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   313
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   314
    bind(GENERATE_HTBL_8_BLKS);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   315
    generateHtbl_eight_blocks(htbl);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   316
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   317
    bind(EXIT_GHASH);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   318
    // zero out xmm registers used for Htbl storage
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   319
    vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   320
    vpxor(xmm1, xmm1, xmm1, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   321
    vpxor(xmm3, xmm3, xmm3, Assembler::AVX_128bit);
1ed8de9045a7 8214074: Ghash optimization using AVX instructions
ascarpino
parents:
diff changeset
   322
    vpxor(xmm15, xmm15, xmm15, Assembler::AVX_128bit);
53017
e10a1f7aaa13 8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents: 52990
diff changeset
   323
}
e10a1f7aaa13 8215354: x86_32 build failures after JDK-8214074 (Ghash optimization using AVX instructions)
shade
parents: 52990
diff changeset
   324
#endif // _LP64