8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions
authorsrukmannagar
Fri, 16 Aug 2019 14:42:50 -0700
changeset 57786 948ac3112da8
parent 57785 8d9362f3b8aa
child 57787 094ef5a91b68
8225625: AES Electronic Codebook (ECB) encryption and decryption optimization using AVX512 + VAES instructions Summary: AES-ECB encryption and decryption optimization for x86_64 architectures supporting AVX3+VAES Reviewed-by: kvn, valeriep Contributed-by: shravya.rukmannagari@intel.com, smita.kamath@intel.com
src/hotspot/cpu/x86/assembler_x86.cpp
src/hotspot/cpu/x86/assembler_x86.hpp
src/hotspot/cpu/x86/macroAssembler_x86.hpp
src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp
src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
src/hotspot/share/aot/aotCodeHeap.cpp
src/hotspot/share/classfile/vmSymbols.cpp
src/hotspot/share/classfile/vmSymbols.hpp
src/hotspot/share/jvmci/vmStructs_jvmci.cpp
src/hotspot/share/opto/c2compiler.cpp
src/hotspot/share/opto/escape.cpp
src/hotspot/share/opto/library_call.cpp
src/hotspot/share/opto/runtime.cpp
src/hotspot/share/opto/runtime.hpp
src/hotspot/share/runtime/stubRoutines.cpp
src/hotspot/share/runtime/stubRoutines.hpp
src/hotspot/share/runtime/vmStructs.cpp
src/java.base/share/classes/com/sun/crypto/provider/ElectronicCodeBook.java
src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java
test/micro/org/openjdk/bench/javax/crypto/full/AESBench.java
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -1380,6 +1380,15 @@
   emit_int8(0xC0 | encode);
 }
 
+void Assembler::vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_vaes(), "requires vaes support/enabling");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xDC);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
@@ -1397,6 +1406,15 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_vaes(), "requires vaes support/enabling");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::andl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Fri Aug 16 14:42:50 2019 -0700
@@ -968,6 +968,9 @@
   void aesenc(XMMRegister dst, XMMRegister src);
   void aesenclast(XMMRegister dst, Address src);
   void aesenclast(XMMRegister dst, XMMRegister src);
+  // Vector AES instructions
+  void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
 
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Fri Aug 16 14:42:50 2019 -0700
@@ -982,6 +982,17 @@
                    XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
                    Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
                    XMMRegister shuf_mask);
+private:
+  void roundEnc(XMMRegister key, int rnum);
+  void lastroundEnc(XMMRegister key, int rnum);
+  void roundDec(XMMRegister key, int rnum);
+  void lastroundDec(XMMRegister key, int rnum);
+  void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
+
+public:
+  void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
+  void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
+
 #endif
 
   void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
--- a/src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_aes.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -30,6 +30,463 @@
 #include "macroAssembler_x86.hpp"
 
 #ifdef _LP64
+
+void MacroAssembler::roundEnc(XMMRegister key, int rnum) {
+    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
+      vaesenc(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
+    }
+}
+
+void MacroAssembler::lastroundEnc(XMMRegister key, int rnum) {
+    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
+      vaesenclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
+    }
+}
+
+void MacroAssembler::roundDec(XMMRegister key, int rnum) {
+    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
+      vaesdec(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
+    }
+}
+
+void MacroAssembler::lastroundDec(XMMRegister key, int rnum) {
+    for (int xmm_reg_no = 0; xmm_reg_no <=rnum; xmm_reg_no++) {
+      vaesdeclast(as_XMMRegister(xmm_reg_no), as_XMMRegister(xmm_reg_no), key, Assembler::AVX_512bit);
+    }
+}
+
+// Load key and shuffle operation
+void MacroAssembler::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    movdqu(xmmdst, Address(key, offset));
+    if (xmm_shuf_mask != NULL) {
+        pshufb(xmmdst, xmm_shuf_mask);
+    } else {
+       pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    }
+   evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
+}
+
+// AES-ECB Encrypt Operation
+void MacroAssembler::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) {
+
+    const Register pos = rax;
+    const Register rounds = r12;
+
+    Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT;
+    push(r13);
+    push(r12);
+
+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+       movl(rax, 0xffff);
+       kmovql(k1, rax);
+    }
+    push(len); // Save
+    push(rbx);
+
+    vzeroupper();
+
+    xorptr(pos, pos);
+
+    // Calculate number of rounds based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
+    movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    // Load Key shuf mask
+    const XMMRegister xmm_key_shuf_mask = xmm31;  // used temporarily to swap key bytes up front
+    movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+
+    // Load and shuffle key based on number of rounds
+    ev_load_key(xmm8, key, 0 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm9, key, 1 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm23, key, 3 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm24, key, 10 * 16, xmm_key_shuf_mask);
+    cmpl(rounds, 52);
+    jcc(Assembler::greaterEqual, KEY_192);
+    jmp(Loop_start);
+
+    bind(KEY_192);
+    ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask);
+    cmpl(rounds, 60);
+    jcc(Assembler::equal, KEY_256);
+    jmp(Loop_start);
+
+    bind(KEY_256);
+    ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask);
+
+    bind(Loop_start);
+    movq(rbx, len);
+    // Divide length by 16 to convert it to number of blocks
+    shrq(len, 4);
+    shlq(rbx, 60);
+    jcc(Assembler::equal, NO_PARTS);
+    addq(len, 1);
+    // Check if number of blocks is greater than or equal to 32
+    // If true, 512 bytes are processed at a time (code marked by label LOOP)
+    // If not, 16 bytes are processed (code marked by REMAINDER label)
+    bind(NO_PARTS);
+    movq(rbx, len);
+    shrq(len, 5);
+    jcc(Assembler::equal, REMAINDER);
+    movl(r13, len);
+    // Compute number of blocks that will be processed 512 bytes at a time
+    // Subtract this from the total number of blocks which will then be processed by REMAINDER loop
+    shlq(r13, 5);
+    subq(rbx, r13);
+    //Begin processing 512 bytes
+    bind(LOOP);
+    // Move 64 bytes of PT data into a zmm register, as a result 512 bytes of PT loaded in zmm0-7
+    evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
+    // Xor with the first round key
+    evpxorq(xmm0, xmm0, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm1, xmm1, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm2, xmm2, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm3, xmm3, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm4, xmm4, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm5, xmm5, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm6, xmm6, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm7, xmm7, xmm8, Assembler::AVX_512bit);
+    // 9 Aes encode round operations
+    roundEnc(xmm9,  7);
+    roundEnc(xmm10, 7);
+    roundEnc(xmm23, 7);
+    roundEnc(xmm12, 7);
+    roundEnc(xmm13, 7);
+    roundEnc(xmm14, 7);
+    roundEnc(xmm15, 7);
+    roundEnc(xmm16, 7);
+    roundEnc(xmm17, 7);
+    cmpl(rounds, 52);
+    jcc(Assembler::aboveEqual, AES192);
+    // Aesenclast round operation for keysize = 128
+    lastroundEnc(xmm24, 7);
+    jmp(END_LOOP);
+    //Additional 2 rounds of Aesenc operation for keysize = 192
+    bind(AES192);
+    roundEnc(xmm24, 7);
+    roundEnc(xmm19, 7);
+    cmpl(rounds, 60);
+    jcc(Assembler::aboveEqual, AES256);
+    // Aesenclast round for keysize = 192
+    lastroundEnc(xmm20, 7);
+    jmp(END_LOOP);
+    // 2 rounds of Aesenc operation and Aesenclast for keysize = 256
+    bind(AES256);
+    roundEnc(xmm20, 7);
+    roundEnc(xmm21, 7);
+    lastroundEnc(xmm22, 7);
+
+    bind(END_LOOP);
+    // Move 512 bytes of CT to destination
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
+
+    addq(pos, 512);
+    decq(len);
+    jcc(Assembler::notEqual, LOOP);
+
+    bind(REMAINDER);
+    vzeroupper();
+    cmpq(rbx, 0);
+    jcc(Assembler::equal, END);
+    // Process 16 bytes at a time
+    bind(LOOP2);
+    movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0));
+    vpxor(xmm1, xmm1, xmm8, Assembler::AVX_128bit);
+    // xmm2 contains shuffled key for Aesenclast operation.
+    vmovdqu(xmm2, xmm24);
+
+    vaesenc(xmm1, xmm1, xmm9, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm10, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm23, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm12, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm13, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm14, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm15, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm16, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm17, Assembler::AVX_128bit);
+
+    cmpl(rounds, 52);
+    jcc(Assembler::below, LAST2);
+    vmovdqu(xmm2, xmm20);
+    vaesenc(xmm1, xmm1, xmm24, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm19, Assembler::AVX_128bit);
+    cmpl(rounds, 60);
+    jcc(Assembler::below, LAST2);
+    vmovdqu(xmm2, xmm22);
+    vaesenc(xmm1, xmm1, xmm20, Assembler::AVX_128bit);
+    vaesenc(xmm1, xmm1, xmm21, Assembler::AVX_128bit);
+
+    bind(LAST2);
+    // Aesenclast round
+    vaesenclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit);
+    // Write 16 bytes of CT to destination
+    movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1);
+    addq(pos, 16);
+    decq(rbx);
+    jcc(Assembler::notEqual, LOOP2);
+
+    bind(END);
+    // Zero out the round keys
+    evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit);
+    evpxorq(xmm23, xmm23, xmm23, Assembler::AVX_512bit);
+    evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit);
+    evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit);
+    evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit);
+    evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit);
+    evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit);
+    evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit);
+    evpxorq(xmm24, xmm24, xmm24, Assembler::AVX_512bit);
+    cmpl(rounds, 44);
+    jcc(Assembler::belowEqual, EXIT);
+    evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit);
+    evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
+    cmpl(rounds, 52);
+    jcc(Assembler::belowEqual, EXIT);
+    evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
+    evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
+    bind(EXIT);
+    pop(rbx);
+    pop(rax); // return length
+    pop(r12);
+    pop(r13);
+}
+
+// AES-ECB Decrypt Operation
+void MacroAssembler::aesecb_decrypt(Register src_addr, Register dest_addr, Register key, Register len)  {
+
+    Label NO_PARTS, LOOP, Loop_start, LOOP2, AES192, END_LOOP, AES256, REMAINDER, LAST2, END, KEY_192, KEY_256, EXIT;
+    const Register pos = rax;
+    const Register rounds = r12;
+    push(r13);
+    push(r12);
+
+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+       movl(rax, 0xffff);
+       kmovql(k1, rax);
+    }
+
+    push(len); // Save
+    push(rbx);
+
+    vzeroupper();
+
+    xorptr(pos, pos);
+    // Calculate number of rounds i.e. based on key length(128, 192, 256):44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
+    movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+    // Load Key shuf mask
+    const XMMRegister xmm_key_shuf_mask = xmm31;  // used temporarily to swap key bytes up front
+    movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+
+    // Load and shuffle round keys. The java expanded key ordering is rotated one position in decryption.
+    // So the first round key is loaded from 1*16 here and last round key is loaded from 0*16
+    ev_load_key(xmm9,  key, 1 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm10, key, 2 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm11, key, 3 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm12, key, 4 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm13, key, 5 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm14, key, 6 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm15, key, 7 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm16, key, 8 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm17, key, 9 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm18, key, 10 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm27, key, 0 * 16, xmm_key_shuf_mask);
+    cmpl(rounds, 52);
+    jcc(Assembler::greaterEqual, KEY_192);
+    jmp(Loop_start);
+
+    bind(KEY_192);
+    ev_load_key(xmm19, key, 11 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm20, key, 12 * 16, xmm_key_shuf_mask);
+    cmpl(rounds, 60);
+    jcc(Assembler::equal, KEY_256);
+    jmp(Loop_start);
+
+    bind(KEY_256);
+    ev_load_key(xmm21, key, 13 * 16, xmm_key_shuf_mask);
+    ev_load_key(xmm22, key, 14 * 16, xmm_key_shuf_mask);
+    bind(Loop_start);
+    movq(rbx, len);
+    // Convert input length to number of blocks
+    shrq(len, 4);
+    shlq(rbx, 60);
+    jcc(Assembler::equal, NO_PARTS);
+    addq(len, 1);
+    // Check if number of blocks is greater than/ equal to 32
+    // If true, blocks then 512 bytes are processed at a time (code marked by label LOOP)
+    // If not, 16 bytes are processed (code marked by label REMAINDER)
+    bind(NO_PARTS);
+    movq(rbx, len);
+    shrq(len, 5);
+    jcc(Assembler::equal, REMAINDER);
+    movl(r13, len);
+    // Compute number of blocks that will be processed as 512 bytes at a time
+    // Subtract this from the total number of blocks, which will then be processed by REMAINDER loop.
+    shlq(r13, 5);
+    subq(rbx, r13);
+
+    bind(LOOP);
+    // Move 64 bytes of CT data into a zmm register, as a result 512 bytes of CT loaded in zmm0-7
+    evmovdquq(xmm0, Address(src_addr, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm1, Address(src_addr, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm2, Address(src_addr, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm3, Address(src_addr, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm4, Address(src_addr, pos, Address::times_1, 4 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm5, Address(src_addr, pos, Address::times_1, 5 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm6, Address(src_addr, pos, Address::times_1, 6 * 64), Assembler::AVX_512bit);
+    evmovdquq(xmm7, Address(src_addr, pos, Address::times_1, 7 * 64), Assembler::AVX_512bit);
+    // Xor with the first round key
+    evpxorq(xmm0, xmm0, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm1, xmm1, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm2, xmm2, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm3, xmm3, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm4, xmm4, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm5, xmm5, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm6, xmm6, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm7, xmm7, xmm9, Assembler::AVX_512bit);
+    // 9 rounds of Aesdec
+    roundDec(xmm10, 7);
+    roundDec(xmm11, 7);
+    roundDec(xmm12, 7);
+    roundDec(xmm13, 7);
+    roundDec(xmm14, 7);
+    roundDec(xmm15, 7);
+    roundDec(xmm16, 7);
+    roundDec(xmm17, 7);
+    roundDec(xmm18, 7);
+    cmpl(rounds, 52);
+    jcc(Assembler::aboveEqual, AES192);
+    // Aesdeclast round for keysize = 128
+    lastroundDec(xmm27, 7);
+    jmp(END_LOOP);
+
+    bind(AES192);
+    // 2 Additional rounds for keysize = 192
+    roundDec(xmm19, 7);
+    roundDec(xmm20, 7);
+    cmpl(rounds, 60);
+    jcc(Assembler::aboveEqual, AES256);
+    // Aesdeclast round for keysize = 192
+    lastroundDec(xmm27, 7);
+    jmp(END_LOOP);
+    bind(AES256);
+    // 2 Additional rounds and Aesdeclast for keysize = 256
+    roundDec(xmm21, 7);
+    roundDec(xmm22, 7);
+    lastroundDec(xmm27, 7);
+
+    bind(END_LOOP);
+    // Write 512 bytes of PT to the destination
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 0 * 64), xmm0, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 1 * 64), xmm1, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 2 * 64), xmm2, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 3 * 64), xmm3, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 4 * 64), xmm4, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 5 * 64), xmm5, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 6 * 64), xmm6, Assembler::AVX_512bit);
+    evmovdquq(Address(dest_addr, pos, Address::times_1, 7 * 64), xmm7, Assembler::AVX_512bit);
+
+    addq(pos, 512);
+    decq(len);
+    jcc(Assembler::notEqual, LOOP);
+
+    bind(REMAINDER);
+    vzeroupper();
+    cmpq(rbx, 0);
+    jcc(Assembler::equal, END);
+    // Process 16 bytes at a time
+    bind(LOOP2);
+    movdqu(xmm1, Address(src_addr, pos, Address::times_1, 0));
+    vpxor(xmm1, xmm1, xmm9, Assembler::AVX_128bit);
+    // xmm2 contains shuffled key for Aesdeclast operation.
+    vmovdqu(xmm2, xmm27);
+
+    vaesdec(xmm1, xmm1, xmm10, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm11, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm12, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm13, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm14, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm15, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm16, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm17, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm18, Assembler::AVX_128bit);
+
+    cmpl(rounds, 52);
+    jcc(Assembler::below, LAST2);
+    vaesdec(xmm1, xmm1, xmm19, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm20, Assembler::AVX_128bit);
+    cmpl(rounds, 60);
+    jcc(Assembler::below, LAST2);
+    vaesdec(xmm1, xmm1, xmm21, Assembler::AVX_128bit);
+    vaesdec(xmm1, xmm1, xmm22, Assembler::AVX_128bit);
+
+    bind(LAST2);
+    // Aesdeclast round
+    vaesdeclast(xmm1, xmm1, xmm2, Assembler::AVX_128bit);
+    // Write 16 bytes of PT to destination
+    movdqu(Address(dest_addr, pos, Address::times_1, 0), xmm1);
+    addq(pos, 16);
+    decq(rbx);
+    jcc(Assembler::notEqual, LOOP2);
+
+    bind(END);
+    // Zero out the round keys
+    evpxorq(xmm8, xmm8, xmm8, Assembler::AVX_512bit);
+    evpxorq(xmm9, xmm9, xmm9, Assembler::AVX_512bit);
+    evpxorq(xmm10, xmm10, xmm10, Assembler::AVX_512bit);
+    evpxorq(xmm11, xmm11, xmm11, Assembler::AVX_512bit);
+    evpxorq(xmm12, xmm12, xmm12, Assembler::AVX_512bit);
+    evpxorq(xmm13, xmm13, xmm13, Assembler::AVX_512bit);
+    evpxorq(xmm14, xmm14, xmm14, Assembler::AVX_512bit);
+    evpxorq(xmm15, xmm15, xmm15, Assembler::AVX_512bit);
+    evpxorq(xmm16, xmm16, xmm16, Assembler::AVX_512bit);
+    evpxorq(xmm17, xmm17, xmm17, Assembler::AVX_512bit);
+    evpxorq(xmm18, xmm18, xmm18, Assembler::AVX_512bit);
+    evpxorq(xmm27, xmm27, xmm27, Assembler::AVX_512bit);
+    cmpl(rounds, 44);
+    jcc(Assembler::belowEqual, EXIT);
+    evpxorq(xmm19, xmm19, xmm19, Assembler::AVX_512bit);
+    evpxorq(xmm20, xmm20, xmm20, Assembler::AVX_512bit);
+    cmpl(rounds, 52);
+    jcc(Assembler::belowEqual, EXIT);
+    evpxorq(xmm21, xmm21, xmm21, Assembler::AVX_512bit);
+    evpxorq(xmm22, xmm22, xmm22, Assembler::AVX_512bit);
+    bind(EXIT);
+    pop(rbx);
+    pop(rax); // return length
+    pop(r12);
+    pop(r13);
+}
+
 // Multiply 128 x 128 bits, using 4 pclmulqdq operations
 void MacroAssembler::schoolbookAAD(int i, Register htbl, XMMRegister data,
     XMMRegister tmp0, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3) {
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -3685,6 +3685,36 @@
     return start;
 }
 
+  address generate_electronicCodeBook_encryptAESCrypt() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_encryptAESCrypt");
+    address start = __ pc();
+    const Register from = c_rarg0;  // source array address
+    const Register to = c_rarg1;  // destination array address
+    const Register key = c_rarg2;  // key array address
+    const Register len = c_rarg3;  // src len (must be multiple of blocksize 16)
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ aesecb_encrypt(from, to, key, len);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+    return start;
+ }
+
+  address generate_electronicCodeBook_decryptAESCrypt() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_decryptAESCrypt");
+    address start = __ pc();
+    const Register from = c_rarg0;  // source array address
+    const Register to = c_rarg1;  // destination array address
+    const Register key = c_rarg2;  // key array address
+    const Register len = c_rarg3;  // src len (must be multiple of blocksize 16)
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ aesecb_decrypt(from, to, key, len);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+    return start;
+  }
+
   address generate_upper_word_mask() {
     __ align(64);
     StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
@@ -5979,6 +6009,8 @@
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       if (VM_Version::supports_vaes() &&  VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) {
         StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
+        StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
+        StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
       } else {
         StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
       }
--- a/src/hotspot/share/aot/aotCodeHeap.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/aot/aotCodeHeap.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -532,6 +532,8 @@
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_aescrypt_decryptBlock", address, StubRoutines::_aescrypt_decryptBlock);
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_cipherBlockChaining_encryptAESCrypt", address, StubRoutines::_cipherBlockChaining_encryptAESCrypt);
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_cipherBlockChaining_decryptAESCrypt", address, StubRoutines::_cipherBlockChaining_decryptAESCrypt);
+    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_electronicCodeBook_encryptAESCrypt", address, StubRoutines::_electronicCodeBook_encryptAESCrypt);
+    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_electronicCodeBook_decryptAESCrypt", address, StubRoutines::_electronicCodeBook_decryptAESCrypt);
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_update_bytes_crc32", address, StubRoutines::_updateBytesCRC32);
     SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_crc_table_adr", address, StubRoutines::_crc_table_adr);
 
--- a/src/hotspot/share/classfile/vmSymbols.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/classfile/vmSymbols.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -463,6 +463,8 @@
   switch (id) {
   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+  case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
+  case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
   case vmIntrinsics::_counterMode_AESCrypt:
     return 1;
   case vmIntrinsics::_digestBase_implCompressMB:
@@ -736,6 +738,10 @@
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return true;
     break;
+  case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
+  case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
+    if (!UseAESIntrinsics) return true;
+    break;
   case vmIntrinsics::_counterMode_AESCrypt:
     if (!UseAESCTRIntrinsics) return true;
     break;
--- a/src/hotspot/share/classfile/vmSymbols.hpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/classfile/vmSymbols.hpp	Fri Aug 16 14:42:50 2019 -0700
@@ -1020,6 +1020,12 @@
    do_name(     decrypt_name,                                      "implDecrypt")                                       \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
+  do_class(com_sun_crypto_provider_electronicCodeBook, "com/sun/crypto/provider/ElectronicCodeBook")                    \
+   do_intrinsic(_electronicCodeBook_encryptAESCrypt, com_sun_crypto_provider_electronicCodeBook, ecb_encrypt_name, byteArray_int_int_byteArray_int_signature, F_R)  \
+   do_intrinsic(_electronicCodeBook_decryptAESCrypt, com_sun_crypto_provider_electronicCodeBook, ecb_decrypt_name, byteArray_int_int_byteArray_int_signature, F_R)  \
+   do_name(ecb_encrypt_name, "implECBEncrypt")                                                                          \
+   do_name(ecb_decrypt_name, "implECBDecrypt")                                                                          \
+                                                                                                                        \
   do_class(com_sun_crypto_provider_counterMode,      "com/sun/crypto/provider/CounterMode")                             \
    do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
    do_name(     crypt_name,                                 "implCrypt")                                                    \
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -301,6 +301,8 @@
   static_field(StubRoutines,                _aescrypt_decryptBlock,                           address)                               \
   static_field(StubRoutines,                _cipherBlockChaining_encryptAESCrypt,             address)                               \
   static_field(StubRoutines,                _cipherBlockChaining_decryptAESCrypt,             address)                               \
+  static_field(StubRoutines,                _electronicCodeBook_encryptAESCrypt,              address)                               \
+  static_field(StubRoutines,                _electronicCodeBook_decryptAESCrypt,              address)                               \
   static_field(StubRoutines,                _counterMode_AESCrypt,                            address)                               \
   static_field(StubRoutines,                _base64_encodeBlock,                              address)                               \
   static_field(StubRoutines,                _ghash_processBlocks,                             address)                               \
--- a/src/hotspot/share/opto/c2compiler.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/opto/c2compiler.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -602,6 +602,8 @@
   case vmIntrinsics::_aescrypt_decryptBlock:
   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+  case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
+  case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
   case vmIntrinsics::_counterMode_AESCrypt:
   case vmIntrinsics::_sha_implCompress:
   case vmIntrinsics::_sha2_implCompress:
--- a/src/hotspot/share/opto/escape.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/opto/escape.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -990,6 +990,8 @@
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_encryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_decryptAESCrypt") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
--- a/src/hotspot/share/opto/library_call.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/opto/library_call.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -293,8 +293,10 @@
   bool inline_Class_cast();
   bool inline_aescrypt_Block(vmIntrinsics::ID id);
   bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
+  bool inline_electronicCodeBook_AESCrypt(vmIntrinsics::ID id);
   bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
+  Node* inline_electronicCodeBook_AESCrypt_predicate(bool decrypting);
   Node* inline_counterMode_AESCrypt_predicate();
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
   Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
@@ -807,6 +809,10 @@
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
 
+  case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
+  case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
+    return inline_electronicCodeBook_AESCrypt(intrinsic_id());
+
   case vmIntrinsics::_counterMode_AESCrypt:
     return inline_counterMode_AESCrypt(intrinsic_id());
 
@@ -912,6 +918,10 @@
     return inline_cipherBlockChaining_AESCrypt_predicate(false);
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     return inline_cipherBlockChaining_AESCrypt_predicate(true);
+  case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
+    return inline_electronicCodeBook_AESCrypt_predicate(false);
+  case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
+    return inline_electronicCodeBook_AESCrypt_predicate(true);
   case vmIntrinsics::_counterMode_AESCrypt:
     return inline_counterMode_AESCrypt_predicate();
   case vmIntrinsics::_digestBase_implCompressMB:
@@ -6019,6 +6029,94 @@
   return true;
 }
 
+//------------------------------inline_electronicCodeBook_AESCrypt-----------------------
+bool LibraryCallKit::inline_electronicCodeBook_AESCrypt(vmIntrinsics::ID id) {
+  address stubAddr = NULL;
+  const char *stubName = NULL;
+
+  assert(UseAES, "need AES instruction support");
+
+  switch (id) {
+  case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
+    stubAddr = StubRoutines::electronicCodeBook_encryptAESCrypt();
+    stubName = "electronicCodeBook_encryptAESCrypt";
+    break;
+  case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
+    stubAddr = StubRoutines::electronicCodeBook_decryptAESCrypt();
+    stubName = "electronicCodeBook_decryptAESCrypt";
+    break;
+  default:
+    break;
+  }
+
+  if (stubAddr == NULL) return false;
+
+  Node* electronicCodeBook_object = argument(0);
+  Node* src                       = argument(1);
+  Node* src_offset                = argument(2);
+  Node* len                       = argument(3);
+  Node* dest                      = argument(4);
+  Node* dest_offset               = argument(5);
+
+  // (1) src and dest are arrays.
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dest_type = dest->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+  assert(top_src != NULL && top_src->klass() != NULL
+         &&  top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+  // checks are the responsibility of the caller
+  Node* src_start = src;
+  Node* dest_start = dest;
+  if (src_offset != NULL || dest_offset != NULL) {
+    assert(src_offset != NULL && dest_offset != NULL, "");
+    src_start = array_element_address(src, src_offset, T_BYTE);
+    dest_start = array_element_address(dest, dest_offset, T_BYTE);
+  }
+
+  // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
+  // (because of the predicated logic executed earlier).
+  // so we cast it here safely.
+  // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+
+  Node* embeddedCipherObj = load_field_from_object(electronicCodeBook_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+  if (embeddedCipherObj == NULL) return false;
+
+  // cast it to what we know it will be at runtime
+  const TypeInstPtr* tinst = _gvn.type(electronicCodeBook_object)->isa_instptr();
+  assert(tinst != NULL, "ECB obj is null");
+  assert(tinst->klass()->is_loaded(), "ECB obj is not loaded");
+  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+  assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
+
+  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
+  const TypeOopPtr* xtype = aklass->as_instance_type();
+  Node* aescrypt_object = new CheckCastPPNode(control(), embeddedCipherObj, xtype);
+  aescrypt_object = _gvn.transform(aescrypt_object);
+
+  // we need to get the start of the aescrypt_object's expanded key array
+  Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+  if (k_start == NULL) return false;
+
+  Node* ecbCrypt;
+  if (Matcher::pass_original_key_for_aes()) {
+    // no SPARC version for AES/ECB intrinsics now.
+    return false;
+  }
+  // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+  ecbCrypt = make_runtime_call(RC_LEAF | RC_NO_FP,
+                               OptoRuntime::electronicCodeBook_aescrypt_Type(),
+                               stubAddr, stubName, TypePtr::BOTTOM,
+                               src_start, dest_start, k_start, len);
+
+  // return cipher length (int)
+  Node* retvalue = _gvn.transform(new ProjNode(ecbCrypt, TypeFunc::Parms));
+  set_result(retvalue);
+  return true;
+}
+
 //------------------------------inline_counterMode_AESCrypt-----------------------
 bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
   assert(UseAES, "need AES instruction support");
@@ -6215,6 +6313,65 @@
   return _gvn.transform(region);
 }
 
+//----------------------------inline_electronicCodeBook_AESCrypt_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+// for encryption:
+//    if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
+// for decryption:
+//    if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
+//    note cipher==plain is more conservative than the original java code but that's OK
+//
+Node* LibraryCallKit::inline_electronicCodeBook_AESCrypt_predicate(bool decrypting) {
+  // The receiver was checked for NULL already.
+  Node* objECB = argument(0);
+
+  // Load embeddedCipher field of ElectronicCodeBook object.
+  Node* embeddedCipherObj = load_field_from_object(objECB, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+
+  // get AESCrypt klass for instanceOf check
+  // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
+  // will have same classloader as ElectronicCodeBook object
+  const TypeInstPtr* tinst = _gvn.type(objECB)->isa_instptr();
+  assert(tinst != NULL, "ECBobj is null");
+  assert(tinst->klass()->is_loaded(), "ECBobj is not loaded");
+
+  // we want to do an instanceof comparison against the AESCrypt class
+  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+  if (!klass_AESCrypt->is_loaded()) {
+    // if AESCrypt is not even loaded, we never take the intrinsic fast path
+    Node* ctrl = control();
+    set_control(top()); // no regular fast path
+    return ctrl;
+  }
+  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+
+  Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
+  Node* cmp_instof = _gvn.transform(new CmpINode(instof, intcon(1)));
+  Node* bool_instof = _gvn.transform(new BoolNode(cmp_instof, BoolTest::ne));
+
+  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+  // for encryption, we are done
+  if (!decrypting)
+    return instof_false;  // even if it is NULL
+
+  // for decryption, we need to add a further check to avoid
+  // taking the intrinsic path when cipher and plain are the same
+  // see the original java code for why.
+  RegionNode* region = new RegionNode(3);
+  region->init_req(1, instof_false);
+  Node* src = argument(1);
+  Node* dest = argument(4);
+  Node* cmp_src_dest = _gvn.transform(new CmpPNode(src, dest));
+  Node* bool_src_dest = _gvn.transform(new BoolNode(cmp_src_dest, BoolTest::eq));
+  Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN);
+  region->init_req(2, src_dest_conjoint);
+
+  record_for_igvn(region);
+  return _gvn.transform(region);
+}
+
 //----------------------------inline_counterMode_AESCrypt_predicate----------------------------
 // Return node representing slow path of predicate check.
 // the pseudo code we want to emulate with this predicate is:
--- a/src/hotspot/share/opto/runtime.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/opto/runtime.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -900,6 +900,33 @@
   return TypeFunc::make(domain, range);
 }
 
+// for electronicCodeBook calls of aescrypt encrypt/decrypt, three pointers and a length, returning int
+const TypeFunc* OptoRuntime::electronicCodeBook_aescrypt_Type() {
+  // create input type (domain)
+  int num_args = 4;
+  if (Matcher::pass_original_key_for_aes()) {
+     num_args = 5;
+  }
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  fields[argp++] = TypePtr::NOTNULL;    // dest
+  fields[argp++] = TypePtr::NOTNULL;    // k array
+  fields[argp++] = TypeInt::INT;        // src len
+  if (Matcher::pass_original_key_for_aes()) {
+     fields[argp++] = TypePtr::NOTNULL;    // original k array
+  }
+  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+  // returning cipher len (int)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms + 0] = TypeInt::INT;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //for counterMode calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
 const TypeFunc* OptoRuntime::counterMode_aescrypt_Type() {
   // create input type (domain)
--- a/src/hotspot/share/opto/runtime.hpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/opto/runtime.hpp	Fri Aug 16 14:42:50 2019 -0700
@@ -275,6 +275,7 @@
 
   static const TypeFunc* aescrypt_block_Type();
   static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+  static const TypeFunc* electronicCodeBook_aescrypt_Type();
   static const TypeFunc* counterMode_aescrypt_Type();
 
   static const TypeFunc* sha_implCompress_Type();
--- a/src/hotspot/share/runtime/stubRoutines.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/runtime/stubRoutines.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -129,6 +129,8 @@
 address StubRoutines::_aescrypt_decryptBlock               = NULL;
 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
+address StubRoutines::_electronicCodeBook_encryptAESCrypt  = NULL;
+address StubRoutines::_electronicCodeBook_decryptAESCrypt  = NULL;
 address StubRoutines::_counterMode_AESCrypt                = NULL;
 address StubRoutines::_ghash_processBlocks                 = NULL;
 address StubRoutines::_base64_encodeBlock                  = NULL;
--- a/src/hotspot/share/runtime/stubRoutines.hpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/runtime/stubRoutines.hpp	Fri Aug 16 14:42:50 2019 -0700
@@ -210,6 +210,8 @@
   static address _aescrypt_decryptBlock;
   static address _cipherBlockChaining_encryptAESCrypt;
   static address _cipherBlockChaining_decryptAESCrypt;
+  static address _electronicCodeBook_encryptAESCrypt;
+  static address _electronicCodeBook_decryptAESCrypt;
   static address _counterMode_AESCrypt;
   static address _ghash_processBlocks;
   static address _base64_encodeBlock;
@@ -376,6 +378,8 @@
   static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
   static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
+  static address electronicCodeBook_encryptAESCrypt()   { return _electronicCodeBook_encryptAESCrypt; }
+  static address electronicCodeBook_decryptAESCrypt()   { return _electronicCodeBook_decryptAESCrypt; }
   static address counterMode_AESCrypt()  { return _counterMode_AESCrypt; }
   static address ghash_processBlocks()   { return _ghash_processBlocks; }
   static address base64_encodeBlock()    { return _base64_encodeBlock; }
--- a/src/hotspot/share/runtime/vmStructs.cpp	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Fri Aug 16 14:42:50 2019 -0700
@@ -592,6 +592,8 @@
      static_field(StubRoutines,                _aescrypt_decryptBlock,                        address)                               \
      static_field(StubRoutines,                _cipherBlockChaining_encryptAESCrypt,          address)                               \
      static_field(StubRoutines,                _cipherBlockChaining_decryptAESCrypt,          address)                               \
+     static_field(StubRoutines,                _electronicCodeBook_encryptAESCrypt,           address)                               \
+     static_field(StubRoutines,                _electronicCodeBook_decryptAESCrypt,           address)                               \
      static_field(StubRoutines,                _counterMode_AESCrypt,                         address)                               \
      static_field(StubRoutines,                _ghash_processBlocks,                          address)                               \
      static_field(StubRoutines,                _base64_encodeBlock,                           address)                               \
--- a/src/java.base/share/classes/com/sun/crypto/provider/ElectronicCodeBook.java	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/java.base/share/classes/com/sun/crypto/provider/ElectronicCodeBook.java	Fri Aug 16 14:42:50 2019 -0700
@@ -28,6 +28,8 @@
 import java.security.InvalidKeyException;
 import java.security.ProviderException;
 import sun.security.util.ArrayUtil;
+import java.util.Objects;
+import jdk.internal.HotSpotIntrinsicCandidate;
 
 /**
  * This class represents ciphers in electronic codebook (ECB) mode.
@@ -95,6 +97,16 @@
         embeddedCipher.init(decrypting, algorithm, key);
     }
 
+    @HotSpotIntrinsicCandidate
+    private int implECBEncrypt(byte [] in, int inOff, int len, byte[] out, int outOff) {
+        for (int i = len; i >= blockSize; i -= blockSize) {
+            embeddedCipher.encryptBlock(in, inOff, out, outOff);
+            inOff += blockSize;
+            outOff += blockSize;
+        }
+        return len;
+    }
+
     /**
      * Performs encryption operation.
      *
@@ -116,9 +128,13 @@
         ArrayUtil.blockSizeCheck(len, blockSize);
         ArrayUtil.nullAndBoundsCheck(in, inOff, len);
         ArrayUtil.nullAndBoundsCheck(out, outOff, len);
+        return implECBEncrypt(in, inOff, len, out, outOff);
+    }
 
+    @HotSpotIntrinsicCandidate
+    private int implECBDecrypt(byte [] in, int inOff, int len, byte[] out, int outOff) {
         for (int i = len; i >= blockSize; i -= blockSize) {
-            embeddedCipher.encryptBlock(in, inOff, out, outOff);
+            embeddedCipher.decryptBlock(in, inOff, out, outOff);
             inOff += blockSize;
             outOff += blockSize;
         }
@@ -146,12 +162,6 @@
         ArrayUtil.blockSizeCheck(len, blockSize);
         ArrayUtil.nullAndBoundsCheck(in, inOff, len);
         ArrayUtil.nullAndBoundsCheck(out, outOff, len);
-
-        for (int i = len; i >= blockSize; i -= blockSize) {
-            embeddedCipher.decryptBlock(in, inOff, out, outOff);
-            inOff += blockSize;
-            outOff += blockSize;
-        }
-        return len;
-    }
+        return implECBDecrypt(in, inOff, len, out, outOff);
+   }
 }
--- a/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java	Fri Aug 16 14:46:52 2019 -0400
+++ b/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java	Fri Aug 16 14:42:50 2019 -0700
@@ -203,6 +203,8 @@
         {"StubRoutines::_aescrypt_decryptBlock", "_aot_stub_routines_aescrypt_decryptBlock"},
         {"StubRoutines::_cipherBlockChaining_encryptAESCrypt", "_aot_stub_routines_cipherBlockChaining_encryptAESCrypt"},
         {"StubRoutines::_cipherBlockChaining_decryptAESCrypt", "_aot_stub_routines_cipherBlockChaining_decryptAESCrypt"},
+        {"StubRoutines::_electronicCodeBook_encryptAESCrypt", "_aot_stub_routines_electronicCodeBook_encryptAESCrypt"},
+        {"StubRoutines::_electronicCodeBook_decryptAESCrypt", "_aot_stub_routines_electronicCodeBook_decryptAESCrypt"},
         {"StubRoutines::_updateBytesCRC32", "_aot_stub_routines_update_bytes_crc32"},
         {"StubRoutines::_crc_table_adr", "_aot_stub_routines_crc_table_adr"},
 
--- a/test/micro/org/openjdk/bench/javax/crypto/full/AESBench.java	Fri Aug 16 14:46:52 2019 -0400
+++ b/test/micro/org/openjdk/bench/javax/crypto/full/AESBench.java	Fri Aug 16 14:42:50 2019 -0700
@@ -43,7 +43,7 @@
     @Param({"AES/ECB/NoPadding", "AES/ECB/PKCS5Padding", "AES/CBC/NoPadding", "AES/CBC/PKCS5Padding"})
     private String algorithm;
 
-    @Param({"128"})
+    @Param({"128", "192", "256"})
     private int keyLength;
 
     @Param({"" + 16 * 1024})