8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
authorascarpino
Wed, 17 Jun 2015 17:41:04 -0700
changeset 31462 1d0b519af651
parent 31254 210db13e7ead
child 31463 c599e805e982
8073108: Use x86 and SPARC CPU instructions for GHASH acceleration Reviewed-by: kvn, jrose
jdk/src/java.base/share/classes/com/sun/crypto/provider/GHASH.java
jdk/test/com/sun/crypto/provider/Cipher/AES/TestGHASH.java
--- a/jdk/src/java.base/share/classes/com/sun/crypto/provider/GHASH.java	Fri Jun 12 09:59:30 2015 -0700
+++ b/jdk/src/java.base/share/classes/com/sun/crypto/provider/GHASH.java	Wed Jun 17 17:41:04 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2015 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -62,14 +62,16 @@
 
     private static final int AES_BLOCK_SIZE = 16;
 
-    // Multiplies state0, state1 by V0, V1.
-    private void blockMult(long V0, long V1) {
+    // Multiplies state[0], state[1] by subkeyH[0], subkeyH[1].
+    private static void blockMult(long[] st, long[] subH) {
         long Z0 = 0;
         long Z1 = 0;
+        long V0 = subH[0];
+        long V1 = subH[1];
         long X;
 
-        // Separate loops for processing state0 and state1.
-        X = state0;
+        // Separate loops for processing state[0] and state[1].
+        X = st[0];
         for (int i = 0; i < 64; i++) {
             // Zi+1 = Zi if bit i of x is 0
             long mask = X >> 63;
@@ -89,7 +91,7 @@
             X <<= 1;
         }
 
-        X = state1;
+        X = st[1];
         for (int i = 64; i < 127; i++) {
             // Zi+1 = Zi if bit i of x is 0
             long mask = X >> 63;
@@ -115,15 +117,18 @@
         Z1 ^= V1 & mask;
 
         // Save result.
-        state0 = Z0;
-        state1 = Z1;
+        st[0] = Z0;
+        st[1] = Z1;
+
     }
 
+    /* subkeyH and state are stored in long[] for GHASH intrinsic use */
+
     // hash subkey H; should not change after the object has been constructed
-    private final long subkeyH0, subkeyH1;
+    private final long[] subkeyH;
 
     // buffer for storing hash
-    private long state0, state1;
+    private final long[] state;
 
     // variables for save/restore calls
     private long stateSave0, stateSave1;
@@ -141,8 +146,10 @@
         if ((subkeyH == null) || subkeyH.length != AES_BLOCK_SIZE) {
             throw new ProviderException("Internal error");
         }
-        this.subkeyH0 = getLong(subkeyH, 0);
-        this.subkeyH1 = getLong(subkeyH, 8);
+        state = new long[2];
+        this.subkeyH = new long[2];
+        this.subkeyH[0] = getLong(subkeyH, 0);
+        this.subkeyH[1] = getLong(subkeyH, 8);
     }
 
     /**
@@ -151,33 +158,30 @@
      * this object for different data w/ the same H.
      */
     void reset() {
-        state0 = 0;
-        state1 = 0;
+        state[0] = 0;
+        state[1] = 0;
     }
 
     /**
      * Save the current snapshot of this GHASH object.
      */
     void save() {
-        stateSave0 = state0;
-        stateSave1 = state1;
+        stateSave0 = state[0];
+        stateSave1 = state[1];
     }
 
     /**
      * Restores this object using the saved snapshot.
      */
     void restore() {
-        state0 = stateSave0;
-        state1 = stateSave1;
+        state[0] = stateSave0;
+        state[1] = stateSave1;
     }
 
-    private void processBlock(byte[] data, int ofs) {
-        if (data.length - ofs < AES_BLOCK_SIZE) {
-            throw new RuntimeException("need complete block");
-        }
-        state0 ^= getLong(data, ofs);
-        state1 ^= getLong(data, ofs + 8);
-        blockMult(subkeyH0, subkeyH1);
+    private static void processBlock(byte[] data, int ofs, long[] st, long[] subH) {
+        st[0] ^= getLong(data, ofs);
+        st[1] ^= getLong(data, ofs + 8);
+        blockMult(st, subH);
     }
 
     void update(byte[] in) {
@@ -185,22 +189,57 @@
     }
 
     void update(byte[] in, int inOfs, int inLen) {
-        if (inLen - inOfs > in.length) {
-            throw new RuntimeException("input length out of bound");
+        if (inLen == 0) {
+            return;
+        }
+        ghashRangeCheck(in, inOfs, inLen, state, subkeyH);
+        processBlocks(in, inOfs, inLen/AES_BLOCK_SIZE, state, subkeyH);
+    }
+
+    private static void ghashRangeCheck(byte[] in, int inOfs, int inLen, long[] st, long[] subH) {
+        if (inLen < 0) {
+            throw new RuntimeException("invalid input length: " + inLen);
+        }
+        if (inOfs < 0) {
+            throw new RuntimeException("invalid offset: " + inOfs);
+        }
+        if (inLen > in.length - inOfs) {
+            throw new RuntimeException("input length out of bound: " +
+                                       inLen + " > " + (in.length - inOfs));
         }
         if (inLen % AES_BLOCK_SIZE != 0) {
-            throw new RuntimeException("input length unsupported");
+            throw new RuntimeException("input length/block size mismatch: " +
+                                       inLen);
         }
 
-        for (int i = inOfs; i < (inOfs + inLen); i += AES_BLOCK_SIZE) {
-            processBlock(in, i);
+        // These two checks are for C2 checking
+        if (st.length != 2) {
+            throw new RuntimeException("internal state has invalid length: " +
+                                       st.length);
+        }
+        if (subH.length != 2) {
+            throw new RuntimeException("internal subkeyH has invalid length: " +
+                                       subH.length);
+        }
+    }
+    /*
+     * This is an intrinsified method.  The method's argument list must match
+     * the hotspot signature.  This method and methods called by it, cannot
+     * throw exceptions or allocate arrays as it will breaking intrinsics
+     */
+    private static void processBlocks(byte[] data, int inOfs, int blocks, long[] st, long[] subH) {
+        int offset = inOfs;
+        while (blocks > 0) {
+            processBlock(data, offset, st, subH);
+            blocks--;
+            offset += AES_BLOCK_SIZE;
         }
     }
 
     byte[] digest() {
         byte[] result = new byte[AES_BLOCK_SIZE];
-        putLong(result, 0, state0);
-        putLong(result, 8, state1);
+        putLong(result, 0, state[0]);
+        putLong(result, 8, state[1]);
         reset();
         return result;
     }
--- a/jdk/test/com/sun/crypto/provider/Cipher/AES/TestGHASH.java	Fri Jun 12 09:59:30 2015 -0700
+++ b/jdk/test/com/sun/crypto/provider/Cipher/AES/TestGHASH.java	Wed Jun 17 17:41:04 2015 -0700
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015, Red Hat, Inc.
+ * Copyright (c) 2015, Oracle, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +25,14 @@
 /*
  * @test
  * @bug 8069072
- * @summary Test vectors for com.sun.crypto.provider.GHASH
+ * @summary Test vectors for com.sun.crypto.provider.GHASH.
+ *
+ * Single iteration to verify software-only GHASH algorithm.
+ * @run main TestGHASH
+ *
+ * Multi-iteration to verify test intrinsics GHASH, if available.
+ * Many iterations are needed so we are sure hotspot will use intrinsic
+ * @run main TestGHASH -n 10000
  */
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
@@ -124,43 +132,55 @@
 
     public static void main(String[] args) throws Exception {
         TestGHASH test;
-        if (args.length == 0) {
-            test = new TestGHASH("com.sun.crypto.provider.GHASH");
-        } else {
-            test = new TestGHASH(args[0]);
+        String test_class = "com.sun.crypto.provider.GHASH";
+        int i = 0;
+        int num_of_loops = 1;
+        while (args.length > i) {
+            if (args[i].compareTo("-c") == 0) {
+                test_class = args[++i];
+            } else if (args[i].compareTo("-n") == 0) {
+                num_of_loops = Integer.parseInt(args[++i]);
+            }
+            i++;
         }
 
-        // Test vectors from David A. McGrew, John Viega,
-        // "The Galois/Counter Mode of Operation (GCM)", 2005.
-        // <http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf>
+        System.out.println("Running " + num_of_loops + " iterations.");
+        test = new TestGHASH(test_class);
+        i = 0;
 
-        test.check(1, "66e94bd4ef8a2c3b884cfa59ca342b2e", "", "",
-                "00000000000000000000000000000000");
-        test.check(2,
-                "66e94bd4ef8a2c3b884cfa59ca342b2e", "",
-                "0388dace60b6a392f328c2b971b2fe78",
-                "f38cbb1ad69223dcc3457ae5b6b0f885");
-        test.check(3,
-                "b83b533708bf535d0aa6e52980d53b78", "",
-                "42831ec2217774244b7221b784d0d49c" +
-                "e3aa212f2c02a4e035c17e2329aca12e" +
-                "21d514b25466931c7d8f6a5aac84aa05" +
-                "1ba30b396a0aac973d58e091473f5985",
-                "7f1b32b81b820d02614f8895ac1d4eac");
-        test.check(4,
-                "b83b533708bf535d0aa6e52980d53b78",
-                "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2",
-                "42831ec2217774244b7221b784d0d49c" +
-                "e3aa212f2c02a4e035c17e2329aca12e" +
-                "21d514b25466931c7d8f6a5aac84aa05" +
-                "1ba30b396a0aac973d58e091",
-                "698e57f70e6ecc7fd9463b7260a9ae5f");
-        test.check(5, "b83b533708bf535d0aa6e52980d53b78",
-                "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2",
-                "61353b4c2806934a777ff51fa22a4755" +
-                "699b2a714fcdc6f83766e5f97b6c7423" +
-                "73806900e49f24b22b097544d4896b42" +
-                "4989b5e1ebac0f07c23f4598",
-                "df586bb4c249b92cb6922877e444d37b");
+        while (num_of_loops > i) {
+            // Test vectors from David A. McGrew, John Viega,
+            // "The Galois/Counter Mode of Operation (GCM)", 2005.
+            // <http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf>
+            test.check(1, "66e94bd4ef8a2c3b884cfa59ca342b2e", "", "",
+                       "00000000000000000000000000000000");
+            test.check(2,
+                       "66e94bd4ef8a2c3b884cfa59ca342b2e", "",
+                       "0388dace60b6a392f328c2b971b2fe78",
+                       "f38cbb1ad69223dcc3457ae5b6b0f885");
+            test.check(3,
+                       "b83b533708bf535d0aa6e52980d53b78", "",
+                       "42831ec2217774244b7221b784d0d49c" +
+                       "e3aa212f2c02a4e035c17e2329aca12e" +
+                       "21d514b25466931c7d8f6a5aac84aa05" +
+                       "1ba30b396a0aac973d58e091473f5985",
+                       "7f1b32b81b820d02614f8895ac1d4eac");
+            test.check(4,
+                       "b83b533708bf535d0aa6e52980d53b78",
+                       "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2",
+                       "42831ec2217774244b7221b784d0d49c" +
+                       "e3aa212f2c02a4e035c17e2329aca12e" +
+                       "21d514b25466931c7d8f6a5aac84aa05" +
+                       "1ba30b396a0aac973d58e091",
+                       "698e57f70e6ecc7fd9463b7260a9ae5f");
+            test.check(5, "b83b533708bf535d0aa6e52980d53b78",
+                       "feedfacedeadbeeffeedfacedeadbeef" + "abaddad2",
+                       "61353b4c2806934a777ff51fa22a4755" +
+                       "699b2a714fcdc6f83766e5f97b6c7423" +
+                       "73806900e49f24b22b097544d4896b42" +
+                       "4989b5e1ebac0f07c23f4598",
+                       "df586bb4c249b92cb6922877e444d37b");
+            i++;
+        }
     }
 }