--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -1007,6 +1007,67 @@
emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
}
+void Assembler::aesdec(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xde);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xde);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdf);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdf);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesenc(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdc);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdc);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesenclast(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_aes(), "");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdd);
+ emit_operand(dst, src);
+}
+
+void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_aes(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0xdd);
+ emit_byte(0xC0 | encode);
+}
+
+
void Assembler::andl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -2307,6 +2368,22 @@
a_byte(p);
}
+void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_ssse3(), "");
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x00);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::pshufb(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_ssse3(), "");
+ assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ emit_byte(0x00);
+ emit_operand(dst, src);
+}
+
void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -8067,6 +8144,15 @@
LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
}
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movdqu(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movdqu(dst, Address(rscratch1, 0));
+ }
+}
+
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::movsd(dst, as_Address(src));
@@ -8357,6 +8443,17 @@
}
}
+void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
+ // Used in sign-bit flipping with aligned address.
+ assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+ if (reachable(src)) {
+ Assembler::pshufb(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::pshufb(dst, Address(rscratch1, 0));
+ }
+}
+
// AVX 3-operands instructions
void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -875,6 +875,17 @@
void addss(XMMRegister dst, Address src);
void addss(XMMRegister dst, XMMRegister src);
+ // AES instructions
+ void aesdec(XMMRegister dst, Address src);
+ void aesdec(XMMRegister dst, XMMRegister src);
+ void aesdeclast(XMMRegister dst, Address src);
+ void aesdeclast(XMMRegister dst, XMMRegister src);
+ void aesenc(XMMRegister dst, Address src);
+ void aesenc(XMMRegister dst, XMMRegister src);
+ void aesenclast(XMMRegister dst, Address src);
+ void aesenclast(XMMRegister dst, XMMRegister src);
+
+
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
@@ -1424,6 +1435,10 @@
void prefetcht2(Address src);
void prefetchw(Address src);
+ // Shuffle Bytes
+ void pshufb(XMMRegister dst, XMMRegister src);
+ void pshufb(XMMRegister dst, Address src);
+
// Shuffle Packed Doublewords
void pshufd(XMMRegister dst, XMMRegister src, int mode);
void pshufd(XMMRegister dst, Address src, int mode);
@@ -2611,6 +2626,12 @@
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
void divss(XMMRegister dst, AddressLiteral src);
+ // Move Unaligned Double Quadword
+ void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
+ void movdqu(XMMRegister dst, AddressLiteral src);
+
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
@@ -2658,6 +2679,10 @@
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
void xorps(XMMRegister dst, AddressLiteral src);
+ // Shuffle Bytes
+ void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
+ void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
+ void pshufb(XMMRegister dst, AddressLiteral src);
// AVX 3-operands instructions
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -2137,6 +2137,529 @@
}
}
+ // AES intrinsic stubs
+ enum {AESBlockSize = 16};
+
+ address generate_key_shuffle_mask() {
+ __ align(16);
+ StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+ address start = __ pc();
+ __ emit_data(0x00010203, relocInfo::none, 0 );
+ __ emit_data(0x04050607, relocInfo::none, 0 );
+ __ emit_data(0x08090a0b, relocInfo::none, 0 );
+ __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
+ return start;
+ }
+
+ // Utility routine for loading a 128-bit key word in little endian format
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ __ movdqu(xmmdst, Address(key, offset));
+ if (xmm_shuf_mask != NULL) {
+ __ pshufb(xmmdst, xmm_shuf_mask);
+ } else {
+ __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ }
+ }
+
+ // aesenc using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesenc(xmmdst, xmmtmp);
+ }
+
+ // aesdec using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesdec(xmmdst, xmmtmp);
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_encryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register keylen = rax;
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(rsi);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
+
+ // For encryption, the java expanded key ordering is just what we need
+
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ pxor(xmm_result, xmm_temp);
+ for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ __ aesenclast(xmm_result, xmm_temp);
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+ __ xorptr(rax, rax); // return 0
+ __ pop(rsi);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_decryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register keylen = rax;
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(rsi);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0));
+
+ // for decryption java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ // we don't know if the key is aligned, hence not using load-execute form
+ load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
+ __ pxor (xmm_result, xmm_temp);
+ for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
+ aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 192 and 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ // for decryption the aesdeclast operation is always on key+0x00
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ aesdeclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+
+ __ xorptr(rax, rax); // return 0
+ __ pop(rsi);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+ void handleSOERegisters(bool saving) {
+ const int saveFrameSizeInBytes = 4 * wordSize;
+ const Address saved_rbx (rbp, -3 * wordSize);
+ const Address saved_rsi (rbp, -2 * wordSize);
+ const Address saved_rdi (rbp, -1 * wordSize);
+
+ if (saving) {
+ __ subptr(rsp, saveFrameSizeInBytes);
+ __ movptr(saved_rsi, rsi);
+ __ movptr(saved_rdi, rdi);
+ __ movptr(saved_rbx, rbx);
+ } else {
+ // restoring
+ __ movptr(rsi, saved_rsi);
+ __ movptr(rdi, saved_rdi);
+ __ movptr(rbx, saved_rbx);
+ }
+ }
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register rvec = rdi; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+ const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ // first 6 keys preloaded into xmm2-xmm7
+ const int XMM_REG_NUM_KEY_FIRST = 2;
+ const int XMM_REG_NUM_KEY_LAST = 7;
+ const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ handleSOERegisters(true /*saving*/);
+
+ // load registers from incoming parameters
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+ const Address rvec_param (rbp, 8+12);
+ const Address len_param (rbp, 8+16);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+ __ movptr(rvec , rvec_param);
+ __ movptr(len_reg , len_param);
+
+ const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 2 thru 7 with keys 0-5
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+ // 128 bit code follows here
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_128);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0xa0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_128);
+
+ __ BIND(L_exit);
+ __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
+
+ handleSOERegisters(false /*restoring*/);
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_192);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0xc0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_256);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0xe0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
+ // CBC AES Decryption.
+ // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
+ //
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+
+ address generate_cipherBlockChaining_decryptAESCrypt() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256;
+ Label L_singleBlock_loopTop_128;
+ Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register rvec = rdi; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+ const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ // first 6 keys preloaded into xmm2-xmm7
+ const int XMM_REG_NUM_KEY_FIRST = 2;
+ const int XMM_REG_NUM_KEY_LAST = 7;
+ const int FIRST_NON_REG_KEY_offset = 0x70;
+ const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ handleSOERegisters(true /*saving*/);
+
+ // load registers from incoming parameters
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+ const Address rvec_param (rbp, 8+12);
+ const Address len_param (rbp, 8+16);
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ __ movptr(key , key_param);
+ __ movptr(rvec , rvec_param);
+ __ movptr(len_reg , len_param);
+
+ // the java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 2 thru 6 with first 5 keys
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ // inside here, use the rvec register to point to previous block cipher
+ // with which we xor at the end of each newly decrypted block
+ const Register prev_block_cipher_ptr = rvec;
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+ // 128-bit code follows here, parallelized
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_128);
+ __ cmpptr(len_reg, 0); // any blocks left??
+ __ jcc(Assembler::equal, L_exit);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
+ aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
+ __ aesdeclast(xmm_result, xmm_temp);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_singleBlock_loopTop_128);
+
+
+ __ BIND(L_exit);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ movptr(rvec , rvec_param); // restore this since used in loop
+ __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
+ handleSOERegisters(false /*restoring*/);
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_192);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
+ aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
+ __ aesdeclast(xmm_result, xmm_temp);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_256);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
+ aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+ }
+ load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
+ __ aesdeclast(xmm_result, xmm_temp);
+ __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
public:
// Information about frame layout at time of blocking runtime call.
// Note that we only have to preserve callee-saved registers since
@@ -2332,6 +2855,16 @@
generate_arraycopy_stubs();
generate_math_stubs();
+
+ // don't bother generating these AES intrinsic stubs unless global flag is set
+ if (UseAESIntrinsics) {
+ StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
+
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+ }
}
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -2941,6 +2941,548 @@
}
}
+ // AES intrinsic stubs
+ enum {AESBlockSize = 16};
+
+ address generate_key_shuffle_mask() {
+ __ align(16);
+ StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+ address start = __ pc();
+ __ emit_data64( 0x0405060700010203, relocInfo::none );
+ __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
+ return start;
+ }
+
+ // Utility routine for loading a 128-bit key word in little endian format
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ __ movdqu(xmmdst, Address(key, offset));
+ if (xmm_shuf_mask != NULL) {
+ __ pshufb(xmmdst, xmm_shuf_mask);
+ } else {
+ __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ }
+ }
+
+ // aesenc using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesenc(xmmdst, xmmtmp);
+ }
+
+ // aesdec using specified key+offset
+ // can optionally specify that the shuffle mask is already in an xmmregister
+ void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+ load_key(xmmtmp, key, offset, xmm_shuf_mask);
+ __ aesdec(xmmdst, xmmtmp);
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_encryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register keylen = rax;
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
+
+ // For encryption, the java expanded key ordering is just what we need
+ // we don't know if the key is aligned, hence not using load-execute form
+
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ pxor(xmm_result, xmm_temp);
+ for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
+ aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
+ aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ __ aesenclast(xmm_result, xmm_temp);
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+ __ xorptr(rax, rax); // return 0
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ //
+ address generate_aescrypt_decryptBlock() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+ Label L_doLast;
+ address start = __ pc();
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register keylen = rax;
+
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ const XMMRegister xmm_key_shuf_mask = xmm2;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ // keylen = # of 32-bit words, convert to 128-bit words
+ __ shrl(keylen, 2);
+ __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_result, Address(from, 0));
+
+ // for decryption java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ // we don't know if the key is aligned, hence not using load-execute form
+ load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
+ __ pxor (xmm_result, xmm_temp);
+ for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
+ aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+ }
+ __ cmpl(keylen, 0);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 192 and 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+ __ subl(keylen, 2);
+ __ jcc(Assembler::equal, L_doLast);
+ // only in 256 bit keys
+ aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+ __ BIND(L_doLast);
+ // for decryption the aesdeclast operation is always on key+0x00
+ load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+ __ aesdeclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, 0), xmm_result); // store the result
+
+ __ xorptr(rax, rax); // return 0
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
+
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register rvec = c_rarg3; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+#ifndef _WIN64
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+#else
+ const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+#endif
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ const XMMRegister xmm_temp = xmm1;
+ // keys 0-10 preloaded into xmm2-xmm12
+ const int XMM_REG_NUM_KEY_FIRST = 2;
+ const int XMM_REG_NUM_KEY_LAST = 12;
+ const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+ const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+ // on win64, fill len_reg from stack position
+ __ movl(len_reg, len_mem);
+ // save the xmm registers which must be preserved 6-12
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+#endif
+
+ const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 2 thru 12 with key 0x00 - 0xa0
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+ // 128 bit code follows here
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_128);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ __ aesenclast(xmm_result, xmm_key10);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_128);
+
+ __ BIND(L_exit);
+ __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
+
+#ifdef _WIN64
+ // restore xmm regs belonging to calling function
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
+#endif
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_192);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
+ load_key(xmm_temp, key, 0xc0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be changed to use more xmm registers)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loopTop_256);
+ __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
+ __ pxor (xmm_result, xmm_temp); // xor with the current r vector
+
+ __ pxor (xmm_result, xmm_key0); // do the aes rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ aesenc(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
+ aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
+ aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
+ load_key(xmm_temp, key, 0xe0);
+ __ aesenclast(xmm_result, xmm_temp);
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual, L_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
+
+ // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
+ // to hide instruction latency
+ //
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - r vector byte array address
+ // c_rarg4 - input length
+ //
+
+ address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+ assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+ address start = __ pc();
+
+ Label L_exit, L_key_192_256, L_key_256;
+ Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128;
+ Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register rvec = c_rarg3; // r byte array initialized from initvector array address
+ // and left with the results of the last encryption block
+#ifndef _WIN64
+ const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
+#else
+ const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+#endif
+ const Register pos = rax;
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_result = xmm0;
+ // keys 0-10 preloaded into xmm2-xmm12
+ const int XMM_REG_NUM_KEY_FIRST = 5;
+ const int XMM_REG_NUM_KEY_LAST = 15;
+ const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+ const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+ // on win64, fill len_reg from stack position
+ __ movl(len_reg, len_mem);
+ // save the xmm registers which must be preserved 6-15
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+#endif
+ // the java expanded key ordering is rotated one position from what we want
+ // so we start from 0x10 here and hit 0x00 last
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ offset += 0x10;
+ }
+
+ const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block
+ // registers holding the four results in the parallelized loop
+ const XMMRegister xmm_result0 = xmm0;
+ const XMMRegister xmm_result1 = xmm2;
+ const XMMRegister xmm_result2 = xmm3;
+ const XMMRegister xmm_result3 = xmm4;
+
+ __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
+
+ // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 44);
+ __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+ // 128-bit code follows here, parallelized
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_multiBlock_loopTop_128);
+ __ cmpptr(len_reg, 4*AESBlockSize); // see if at least 4 blocks left
+ __ jcc(Assembler::less, L_singleBlock_loopTop_128);
+
+ __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize)); // get next 4 blocks into xmmresult registers
+ __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize));
+ __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize));
+ __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize));
+
+#define DoFour(opc, src_reg) \
+ __ opc(xmm_result0, src_reg); \
+ __ opc(xmm_result1, src_reg); \
+ __ opc(xmm_result2, src_reg); \
+ __ opc(xmm_result3, src_reg);
+
+ DoFour(pxor, xmm_key_first);
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ DoFour(aesdec, as_XMMRegister(rnum));
+ }
+ DoFour(aesdeclast, xmm_key_last);
+ // for each result, xor with the r vector of previous cipher block
+ __ pxor(xmm_result0, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize));
+ __ pxor(xmm_result1, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize));
+ __ pxor(xmm_result2, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize));
+ __ pxor(xmm_result3, xmm_prev_block_cipher);
+ __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize)); // this will carry over to next set of blocks
+
+ __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output
+ __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1);
+ __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2);
+ __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3);
+
+ __ addptr(pos, 4*AESBlockSize);
+ __ subptr(len_reg, 4*AESBlockSize);
+ __ jmp(L_multiBlock_loopTop_128);
+
+ // registers used in the non-parallelized loops
+ const XMMRegister xmm_prev_block_cipher_save = xmm2;
+ const XMMRegister xmm_temp = xmm3;
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_128);
+ __ cmpptr(len_reg, 0); // any blocks left??
+ __ jcc(Assembler::equal, L_exit);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ __ aesdeclast(xmm_result, xmm_key_last);
+ __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
+
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_singleBlock_loopTop_128);
+
+
+ __ BIND(L_exit);
+ __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object
+#ifdef _WIN64
+ // restore regs belonging to calling function
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
+#endif
+ __ movl(rax, 0); // return 0 (why?)
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+
+ __ BIND(L_key_192_256);
+ // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::notEqual, L_key_256);
+
+ // 192-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_192);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+ __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
+ __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
+
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+ __ jmp(L_exit);
+
+ __ BIND(L_key_256);
+ // 256-bit code follows here (could be optimized to use parallelism)
+ __ movptr(pos, 0);
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlock_loopTop_256);
+ __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
+ __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
+ __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
+ for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+ __ aesdec(xmm_result, as_XMMRegister(rnum));
+ }
+ aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0
+ aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
+ aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
+ __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
+ __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
+ // no need to store r to memory until we exit
+ __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
+
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+ __ jmp(L_exit);
+
+ return start;
+ }
+
+
+
#undef __
#define __ masm->
@@ -3135,6 +3677,16 @@
generate_arraycopy_stubs();
generate_math_stubs();
+
+ // don't bother generating these AES intrinsic stubs unless global flag is set
+ if (UseAESIntrinsics) {
+ StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
+
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+ }
}
public:
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -44,3 +44,4 @@
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -41,10 +41,14 @@
private:
static address _verify_mxcsr_entry;
static address _verify_fpu_cntrl_wrd_entry;
+ // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+ static address _key_shuffle_mask_addr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; }
+ static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+
};
static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; }
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -56,3 +56,4 @@
address StubRoutines::x86::_double_sign_mask = NULL;
address StubRoutines::x86::_double_sign_flip = NULL;
address StubRoutines::x86::_mxcsr_std = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -54,6 +54,8 @@
static address _double_sign_mask;
static address _double_sign_flip;
static address _mxcsr_std;
+ // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+ static address _key_shuffle_mask_addr;
public:
@@ -116,6 +118,9 @@
{
return _mxcsr_std;
}
+
+ static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+
};
#endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -419,13 +419,16 @@
if (UseAVX < 1)
_cpuFeatures &= ~CPU_AVX;
+ if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
+ _cpuFeatures &= ~CPU_AES;
+
if (logical_processors_per_package() == 1) {
// HT processor could be installed on a system which doesn't support HT.
_cpuFeatures &= ~CPU_HT;
}
char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@@ -441,6 +444,7 @@
(supports_popcnt() ? ", popcnt" : ""),
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
+ (supports_aes() ? ", aes" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""),
@@ -472,6 +476,29 @@
if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0;
+ // Use AES instructions if available.
+ if (supports_aes()) {
+ if (FLAG_IS_DEFAULT(UseAES)) {
+ UseAES = true;
+ }
+ } else if (UseAES) {
+ if (!FLAG_IS_DEFAULT(UseAES))
+ warning("AES instructions not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
+ }
+
+ // The AES intrinsic stubs require AES instruction support (of course)
+ // but also require AVX mode for misaligned SSE access
+ if (UseAES && (UseAVX > 0)) {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ UseAESIntrinsics = true;
+ }
+ } else if (UseAESIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
+ warning("AES intrinsics not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+
#ifdef COMPILER2
if (UseFPUForSpilling) {
if (UseSSE < 2) {
@@ -714,6 +741,9 @@
if (UseAVX > 0) {
tty->print(" UseAVX=%d",UseAVX);
}
+ if (UseAES) {
+ tty->print(" UseAES=1");
+ }
tty->cr();
tty->print("Allocation");
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -78,7 +78,9 @@
sse4_2 : 1,
: 2,
popcnt : 1,
- : 3,
+ : 1,
+ aes : 1,
+ : 1,
osxsave : 1,
avx : 1,
: 3;
@@ -244,7 +246,8 @@
CPU_TSC = (1 << 15),
CPU_TSCINV = (1 << 16),
CPU_AVX = (1 << 17),
- CPU_AVX2 = (1 << 18)
+ CPU_AVX2 = (1 << 18),
+ CPU_AES = (1 << 19)
} cpuFeatureFlags;
enum {
@@ -420,6 +423,8 @@
result |= CPU_TSC;
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
result |= CPU_TSCINV;
+ if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
+ result |= CPU_AES;
// AMD features.
if (is_amd()) {
@@ -544,6 +549,7 @@
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
+ static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
--- a/hotspot/src/cpu/x86/vm/x86.ad Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/cpu/x86/vm/x86.ad Fri Oct 26 12:06:55 2012 -0700
@@ -4102,9 +4102,158 @@
// ----------------------- LogicalRightShift -----------------------------------
-// Shorts/Chars vector logical right shift produces incorrect Java result
+// Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with
-// sign extension before a shift.
+// sign extension before a shift. But char vectors are fine since chars are
+// unsigned values.
+
+instruct vsrl2S(vecS dst, vecS shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_imm(vecS dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S(vecD dst, vecS shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_imm(vecD dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S(vecX dst, vecS shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_imm(vecX dst, immI8 shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS dst shift));
+ format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
+ ins_encode %{
+ bool vector256 = false;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ %}
+ ins_pipe( pipe_slow );
+%}
// Integers vector logical right shift
instruct vsrl2I(vecD dst, vecS shift) %{
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -1844,17 +1844,12 @@
code == Bytecodes::_invokevirtual && target->is_final_method() ||
code == Bytecodes::_invokedynamic) {
ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
- bool success = false;
- if (target->is_method_handle_intrinsic()) {
- // method handle invokes
- success = try_method_handle_inline(target);
- } else {
- // static binding => check if callee is ok
- success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
- }
+ // static binding => check if callee is ok
+ bool success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
+
CHECK_BAILOUT();
-
clear_inline_bailout();
+
if (success) {
// Register dependence if JVMTI has either breakpoint
// setting or hotswapping of methods capabilities since they may
@@ -3201,6 +3196,11 @@
return false;
}
+ // method handle invokes
+ if (callee->is_method_handle_intrinsic()) {
+ return try_method_handle_inline(callee);
+ }
+
// handle intrinsics
if (callee->intrinsic_id() != vmIntrinsics::_none) {
if (try_inline_intrinsics(callee)) {
@@ -3885,10 +3885,14 @@
ValueType* type = state()->stack_at(args_base)->type();
if (type->is_constant()) {
ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget();
- guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove
- Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
- if (try_inline(target, /*holder_known*/ true, bc)) {
- return true;
+ // We don't do CHA here so only inline static and statically bindable methods.
+ if (target->is_static() || target->can_be_statically_bound()) {
+ Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+ if (try_inline(target, /*holder_known*/ true, bc)) {
+ return true;
+ }
+ } else {
+ print_inlining(target, "not static or statically bindable", /*success*/ false);
}
} else {
print_inlining(callee, "receiver not constant", /*success*/ false);
@@ -3941,9 +3945,14 @@
}
j += t->size(); // long and double take two slots
}
- Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
- if (try_inline(target, /*holder_known*/ true, bc)) {
- return true;
+ // We don't do CHA here so only inline static and statically bindable methods.
+ if (target->is_static() || target->can_be_statically_bound()) {
+ Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+ if (try_inline(target, /*holder_known*/ true, bc)) {
+ return true;
+ }
+ } else {
+ print_inlining(target, "not static or statically bindable", /*success*/ false);
}
}
} else {
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -110,6 +110,7 @@
template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \
template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \
template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \
+ template(sun_misc_Launcher_ExtClassLoader, "sun/misc/Launcher$ExtClassLoader") \
\
/* Java runtime version access */ \
template(sun_misc_Version, "sun/misc/Version") \
@@ -723,6 +724,21 @@
/* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\
+ /* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \
+ do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \
+ do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
+ do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \
+ do_name( encryptBlock_name, "encryptBlock") \
+ do_name( decryptBlock_name, "decryptBlock") \
+ do_signature(byteArray_int_byteArray_int_signature, "([BI[BI)V") \
+ \
+ do_class(com_sun_crypto_provider_cipherBlockChaining, "com/sun/crypto/provider/CipherBlockChaining") \
+ do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
+ do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
+ do_name( encrypt_name, "encrypt") \
+ do_name( decrypt_name, "decrypt") \
+ do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \
+ \
/* support for sun.misc.Unsafe */ \
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
\
--- a/hotspot/src/share/vm/oops/method.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/oops/method.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -1155,8 +1155,12 @@
vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
// if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
// because we are not loading from core libraries
- if (InstanceKlass::cast(holder)->class_loader() != NULL)
+ // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar
+ // which does not use the class default class loader so we check for its loader here
+ if ((InstanceKlass::cast(holder)->class_loader() != NULL) &&
+ InstanceKlass::cast(holder)->class_loader()->klass()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) {
return vmSymbols::NO_SID; // regardless of name, no intrinsics here
+ }
// see if the klass name is well-known:
Symbol* klass_name = InstanceKlass::cast(holder)->name();
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -439,6 +439,9 @@
product(bool, DoEscapeAnalysis, true, \
"Perform escape analysis") \
\
+ develop(bool, ExitEscapeAnalysisOnTimeout, true, \
+ "Exit or throw assert in EA when it reaches time limit") \
+ \
notproduct(bool, PrintEscapeAnalysis, false, \
"Print the results of escape analysis") \
\
--- a/hotspot/src/share/vm/opto/callGenerator.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -670,6 +670,129 @@
}
+//------------------------PredictedIntrinsicGenerator------------------------------
+// Internal class which handles all predicted Intrinsic calls.
+class PredictedIntrinsicGenerator : public CallGenerator {
+ CallGenerator* _intrinsic;
+ CallGenerator* _cg;
+
+public:
+ PredictedIntrinsicGenerator(CallGenerator* intrinsic,
+ CallGenerator* cg)
+ : CallGenerator(cg->method())
+ {
+ _intrinsic = intrinsic;
+ _cg = cg;
+ }
+
+ virtual bool is_virtual() const { return true; }
+ virtual bool is_inlined() const { return true; }
+ virtual bool is_intrinsic() const { return true; }
+
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
+ CallGenerator* cg) {
+ return new PredictedIntrinsicGenerator(intrinsic, cg);
+}
+
+
+JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) {
+ GraphKit kit(jvms);
+ PhaseGVN& gvn = kit.gvn();
+
+ CompileLog* log = kit.C->log();
+ if (log != NULL) {
+ log->elem("predicted_intrinsic bci='%d' method='%d'",
+ jvms->bci(), log->identify(method()));
+ }
+
+ Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
+ if (kit.failing())
+ return NULL; // might happen because of NodeCountInliningCutoff
+
+ SafePointNode* slow_map = NULL;
+ JVMState* slow_jvms;
+ if (slow_ctl != NULL) {
+ PreserveJVMState pjvms(&kit);
+ kit.set_control(slow_ctl);
+ if (!kit.stopped()) {
+ slow_jvms = _cg->generate(kit.sync_jvms());
+ if (kit.failing())
+ return NULL; // might happen because of NodeCountInliningCutoff
+ assert(slow_jvms != NULL, "must be");
+ kit.add_exception_states_from(slow_jvms);
+ kit.set_map(slow_jvms->map());
+ if (!kit.stopped())
+ slow_map = kit.stop();
+ }
+ }
+
+ if (kit.stopped()) {
+ // Predicate is always false.
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ // Generate intrinsic code:
+ JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms());
+ if (new_jvms == NULL) {
+ // Intrinsic failed, so use slow code or make a direct call.
+ if (slow_map == NULL) {
+ CallGenerator* cg = CallGenerator::for_direct_call(method());
+ new_jvms = cg->generate(kit.sync_jvms());
+ } else {
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+ }
+ kit.add_exception_states_from(new_jvms);
+ kit.set_jvms(new_jvms);
+
+ // Need to merge slow and fast?
+ if (slow_map == NULL) {
+ // The fast path is the only path remaining.
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ if (kit.stopped()) {
+ // Intrinsic method threw an exception, so it's just the slow path after all.
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ // Finish the diamond.
+ kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+ RegionNode* region = new (kit.C) RegionNode(3);
+ region->init_req(1, kit.control());
+ region->init_req(2, slow_map->control());
+ kit.set_control(gvn.transform(region));
+ Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+ iophi->set_req(2, slow_map->i_o());
+ kit.set_i_o(gvn.transform(iophi));
+ kit.merge_memory(slow_map->merged_memory(), region, 2);
+ uint tos = kit.jvms()->stkoff() + kit.sp();
+ uint limit = slow_map->req();
+ for (uint i = TypeFunc::Parms; i < limit; i++) {
+ // Skip unused stack slots; fast forward to monoff();
+ if (i == tos) {
+ i = kit.jvms()->monoff();
+ if( i >= limit ) break;
+ }
+ Node* m = kit.map()->in(i);
+ Node* n = slow_map->in(i);
+ if (m != n) {
+ const Type* t = gvn.type(m)->meet(gvn.type(n));
+ Node* phi = PhiNode::make(region, m, t);
+ phi->set_req(2, n);
+ kit.map()->set_req(i, gvn.transform(phi));
+ }
+ }
+ return kit.transfer_exceptions_into_jvms();
+}
+
//-------------------------UncommonTrapCallGenerator-----------------------------
// Internal class which handles all out-of-line calls checking receiver type.
class UncommonTrapCallGenerator : public CallGenerator {
--- a/hotspot/src/share/vm/opto/callGenerator.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/callGenerator.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -143,6 +143,9 @@
// Registry for intrinsics:
static CallGenerator* for_intrinsic(ciMethod* m);
static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+ static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
+ CallGenerator* cg);
+ virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
if (PrintInlining)
--- a/hotspot/src/share/vm/opto/compile.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -3047,9 +3047,9 @@
case T_LONG:
case T_DOUBLE: return (_v._value.j == other._v._value.j);
case T_OBJECT:
- case T_METADATA: return (_v._metadata == other._v._metadata);
case T_ADDRESS: return (_v._value.l == other._v._value.l);
case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries
+ case T_METADATA: return (_v._metadata == other._v._metadata);
default: ShouldNotReachHere();
}
return false;
--- a/hotspot/src/share/vm/opto/compile.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/compile.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -149,7 +149,7 @@
private:
BasicType _type;
union {
- jvalue _value;
+ jvalue _value;
Metadata* _metadata;
} _v;
int _offset; // offset of this constant (in bytes) relative to the constant table base.
--- a/hotspot/src/share/vm/opto/doCall.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/doCall.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -107,7 +107,17 @@
// intrinsics handle strict f.p. correctly.
if (allow_inline && allow_intrinsics) {
CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
- if (cg != NULL) return cg;
+ if (cg != NULL) {
+ if (cg->is_predicted()) {
+ // Code without intrinsic but, hopefully, inlined.
+ CallGenerator* inline_cg = this->call_generator(callee,
+ vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false);
+ if (inline_cg != NULL) {
+ cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+ }
+ }
+ return cg;
+ }
}
// Do method handle calls.
--- a/hotspot/src/share/vm/opto/escape.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/escape.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -893,12 +893,16 @@
arg_has_oops && (i > TypeFunc::Parms);
#ifdef ASSERT
if (!(is_arraycopy ||
- call->as_CallLeaf()->_name != NULL &&
- (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
- strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
- ) {
+ (call->as_CallLeaf()->_name != NULL &&
+ (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+ ))) {
call->dump();
- assert(false, "EA: unexpected CallLeaf");
+ fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
}
#endif
// Always process arraycopy's destination object since
@@ -1080,7 +1084,7 @@
C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time");
C->log()->end_elem(" limit'");
}
- assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
+ assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
// Possible infinite build_connection_graph loop,
// bailout (no changes to ideal graph were made).
--- a/hotspot/src/share/vm/opto/library_call.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -44,18 +44,22 @@
public:
private:
bool _is_virtual;
+ bool _is_predicted;
vmIntrinsics::ID _intrinsic_id;
public:
- LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
+ LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id)
: InlineCallGenerator(m),
_is_virtual(is_virtual),
+ _is_predicted(is_predicted),
_intrinsic_id(id)
{
}
virtual bool is_intrinsic() const { return true; }
virtual bool is_virtual() const { return _is_virtual; }
+ virtual bool is_predicted() const { return _is_predicted; }
virtual JVMState* generate(JVMState* jvms);
+ virtual Node* generate_predicate(JVMState* jvms);
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
};
@@ -83,6 +87,7 @@
int arg_size() const { return callee()->arg_size(); }
bool try_to_inline();
+ Node* try_to_predicate();
// Helper functions to inline natives
void push_result(RegionNode* region, PhiNode* value);
@@ -148,6 +153,7 @@
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, true, false);
}
+ Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
Node* make_string_method_node(int opcode, Node* str1, Node* str2);
@@ -253,6 +259,10 @@
bool inline_reverseBytes(vmIntrinsics::ID id);
bool inline_reference_get();
+ bool inline_aescrypt_Block(vmIntrinsics::ID id);
+ bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
+ Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
+ Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
};
@@ -306,6 +316,8 @@
}
}
+ bool is_predicted = false;
+
switch (id) {
case vmIntrinsics::_compareTo:
if (!SpecialStringCompareTo) return NULL;
@@ -413,6 +425,18 @@
break;
#endif
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ if (!UseAESIntrinsics) return NULL;
+ break;
+
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ if (!UseAESIntrinsics) return NULL;
+ // these two require the predicated logic
+ is_predicted = true;
+ break;
+
default:
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -444,7 +468,7 @@
if (!InlineUnsafeOps) return NULL;
}
- return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
+ return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id);
}
//----------------------register_library_intrinsics-----------------------
@@ -496,6 +520,47 @@
return NULL;
}
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+ LibraryCallKit kit(jvms, this);
+ Compile* C = kit.C;
+ int nodes = C->unique();
+#ifndef PRODUCT
+ assert(is_predicted(), "sanity");
+ if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
+ char buf[1000];
+ const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
+ tty->print_cr("Predicate for intrinsic %s", str);
+ }
+#endif
+
+ Node* slow_ctl = kit.try_to_predicate();
+ if (!kit.failing()) {
+ if (C->log()) {
+ C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " virtual='1'" : ""),
+ C->unique() - nodes);
+ }
+ return slow_ctl; // Could be NULL if the check folds.
+ }
+
+ // The intrinsic bailed out
+ if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+ if (jvms->has_method()) {
+ // Not a root compile.
+ const char* msg = "failed to generate predicate for intrinsic";
+ CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg);
+ } else {
+ // Root compile
+ tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " (virtual)" : ""), kit.bci());
+ }
+ }
+ C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
+ return NULL;
+}
+
bool LibraryCallKit::try_to_inline() {
// Handle symbolic names for otherwise undistinguished boolean switches:
const bool is_store = true;
@@ -767,6 +832,14 @@
case vmIntrinsics::_Reference_get:
return inline_reference_get();
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ return inline_aescrypt_Block(intrinsic_id());
+
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
+
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@@ -780,6 +853,36 @@
}
}
+Node* LibraryCallKit::try_to_predicate() {
+ if (!jvms()->has_method()) {
+ // Root JVMState has a null method.
+ assert(map()->memory()->Opcode() == Op_Parm, "");
+ // Insert the memory aliasing node
+ set_all_memory(reset_memory());
+ }
+ assert(merged_memory(), "");
+
+ switch (intrinsic_id()) {
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ return inline_cipherBlockChaining_AESCrypt_predicate(false);
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ return inline_cipherBlockChaining_AESCrypt_predicate(true);
+
+ default:
+ // If you get here, it may be that someone has added a new intrinsic
+ // to the list in vmSymbols.hpp without implementing it here.
+#ifndef PRODUCT
+ if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
+ tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
+ vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
+ }
+#endif
+ Node* slow_ctl = control();
+ set_control(top()); // No fast path instrinsic
+ return slow_ctl;
+ }
+}
+
//------------------------------push_result------------------------------
// Helper function for finishing intrinsics.
void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
@@ -3830,7 +3933,7 @@
vtable_index*vtableEntry::size()) * wordSize +
vtableEntry::method_offset_in_bytes();
Node* entry_addr = basic_plus_adr(obj_klass, entry_offset);
- Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
+ Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS);
// Compare the target method with the expected method (e.g., Object.hashCode).
const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
@@ -5613,3 +5716,265 @@
push(result);
return true;
}
+
+
+Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
+ bool is_exact=true, bool is_static=false) {
+
+ const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
+ assert(tinst != NULL, "obj is null");
+ assert(tinst->klass()->is_loaded(), "obj is not loaded");
+ assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
+
+ ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName),
+ ciSymbol::make(fieldTypeString),
+ is_static);
+ if (field == NULL) return (Node *) NULL;
+ assert (field != NULL, "undefined field");
+
+ // Next code copied from Parse::do_get_xxx():
+
+ // Compute address and memory type.
+ int offset = field->offset_in_bytes();
+ bool is_vol = field->is_volatile();
+ ciType* field_klass = field->type();
+ assert(field_klass->is_loaded(), "should be loaded");
+ const TypePtr* adr_type = C->alias_type(field)->adr_type();
+ Node *adr = basic_plus_adr(fromObj, fromObj, offset);
+ BasicType bt = field->layout_type();
+
+ // Build the resultant type of the load
+ const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+
+ // Build the load.
+ Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol);
+ return loadedField;
+}
+
+
+//------------------------------inline_aescrypt_Block-----------------------
+bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
+ address stubAddr;
+ const char *stubName;
+ assert(UseAES, "need AES instruction support");
+
+ switch(id) {
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ stubAddr = StubRoutines::aescrypt_encryptBlock();
+ stubName = "aescrypt_encryptBlock";
+ break;
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ stubAddr = StubRoutines::aescrypt_decryptBlock();
+ stubName = "aescrypt_decryptBlock";
+ break;
+ }
+ if (stubAddr == NULL) return false;
+
+ // Restore the stack and pop off the arguments.
+ int nargs = 5; // this + 2 oop/offset combos
+ assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments");
+
+ Node *aescrypt_object = argument(0);
+ Node *src = argument(1);
+ Node *src_offset = argument(2);
+ Node *dest = argument(3);
+ Node *dest_offset = argument(4);
+
+ // (1) src and dest are arrays.
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dest_type = dest->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+ assert (top_src != NULL && top_src->klass() != NULL && top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+ // for the quick and dirty code we will skip all the checks.
+ // we are just trying to get the call to be generated.
+ Node* src_start = src;
+ Node* dest_start = dest;
+ if (src_offset != NULL || dest_offset != NULL) {
+ assert(src_offset != NULL && dest_offset != NULL, "");
+ src_start = array_element_address(src, src_offset, T_BYTE);
+ dest_start = array_element_address(dest, dest_offset, T_BYTE);
+ }
+
+ // now need to get the start of its expanded key array
+ // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+ if (k_start == NULL) return false;
+
+ // Call the stub.
+ make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ src_start, dest_start, k_start);
+
+ return true;
+}
+
+//------------------------------inline_cipherBlockChaining_AESCrypt-----------------------
+bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
+ address stubAddr;
+ const char *stubName;
+
+ assert(UseAES, "need AES instruction support");
+
+ switch(id) {
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt();
+ stubName = "cipherBlockChaining_encryptAESCrypt";
+ break;
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt();
+ stubName = "cipherBlockChaining_decryptAESCrypt";
+ break;
+ }
+ if (stubAddr == NULL) return false;
+
+
+ // Restore the stack and pop off the arguments.
+ int nargs = 6; // this + oop/offset + len + oop/offset
+ assert(callee()->signature()->size() == nargs-1, "wrong number of arguments");
+ Node *cipherBlockChaining_object = argument(0);
+ Node *src = argument(1);
+ Node *src_offset = argument(2);
+ Node *len = argument(3);
+ Node *dest = argument(4);
+ Node *dest_offset = argument(5);
+
+ // (1) src and dest are arrays.
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dest_type = dest->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+ assert (top_src != NULL && top_src->klass() != NULL
+ && top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+ // checks are the responsibility of the caller
+ Node* src_start = src;
+ Node* dest_start = dest;
+ if (src_offset != NULL || dest_offset != NULL) {
+ assert(src_offset != NULL && dest_offset != NULL, "");
+ src_start = array_element_address(src, src_offset, T_BYTE);
+ dest_start = array_element_address(dest, dest_offset, T_BYTE);
+ }
+
+ // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
+ // (because of the predicated logic executed earlier).
+ // so we cast it here safely.
+ // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+
+ Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+ if (embeddedCipherObj == NULL) return false;
+
+ // cast it to what we know it will be at runtime
+ const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr();
+ assert(tinst != NULL, "CBC obj is null");
+ assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+ if (!klass_AESCrypt->is_loaded()) return false;
+
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+ const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
+ const TypeOopPtr* xtype = aklass->as_instance_type();
+ Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
+ aescrypt_object = _gvn.transform(aescrypt_object);
+
+ // we need to get the start of the aescrypt_object's expanded key array
+ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+ if (k_start == NULL) return false;
+
+ // similarly, get the start address of the r vector
+ Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false);
+ if (objRvec == NULL) return false;
+ Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
+
+ // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+ make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ src_start, dest_start, k_start, r_start, len);
+
+ // return is void so no result needs to be pushed
+
+ return true;
+}
+
+//------------------------------get_key_start_from_aescrypt_object-----------------------
+Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
+ Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false);
+ assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
+ if (objAESCryptKey == NULL) return (Node *) NULL;
+
+ // now have the array, need to get the start address of the K array
+ Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT);
+ return k_start;
+}
+
+//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+// for encryption:
+// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
+// for decryption:
+// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
+// note cipher==plain is more conservative than the original java code but that's OK
+//
+Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
+ // First, check receiver for NULL since it is virtual method.
+ int nargs = arg_size();
+ Node* objCBC = argument(0);
+ _sp += nargs;
+ objCBC = do_null_check(objCBC, T_OBJECT);
+ _sp -= nargs;
+
+ if (stopped()) return NULL; // Always NULL
+
+ // Load embeddedCipher field of CipherBlockChaining object.
+ Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+
+ // get AESCrypt klass for instanceOf check
+ // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
+ // will have same classloader as CipherBlockChaining object
+ const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr();
+ assert(tinst != NULL, "CBCobj is null");
+ assert(tinst->klass()->is_loaded(), "CBCobj is not loaded");
+
+ // we want to do an instanceof comparison against the AESCrypt class
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+ if (!klass_AESCrypt->is_loaded()) {
+ // if AESCrypt is not even loaded, we never take the intrinsic fast path
+ Node* ctrl = control();
+ set_control(top()); // no regular fast path
+ return ctrl;
+ }
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+
+ _sp += nargs; // gen_instanceof might do an uncommon trap
+ Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
+ _sp -= nargs;
+ Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
+ Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+
+ Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+ // for encryption, we are done
+ if (!decrypting)
+ return instof_false; // even if it is NULL
+
+ // for decryption, we need to add a further check to avoid
+ // taking the intrinsic path when cipher and plain are the same
+ // see the original java code for why.
+ RegionNode* region = new(C) RegionNode(3);
+ region->init_req(1, instof_false);
+ Node* src = argument(1);
+ Node *dest = argument(4);
+ Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest));
+ Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq));
+ Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN);
+ region->init_req(2, src_dest_conjoint);
+
+ record_for_igvn(region);
+ return _gvn.transform(region);
+
+}
+
+
--- a/hotspot/src/share/vm/opto/mulnode.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/mulnode.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -479,24 +479,27 @@
return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF));
// Masking bits off of a Short? Loading a Character does some masking
- if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
- Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
- load->in(MemNode::Memory),
- load->in(MemNode::Address),
- load->adr_type());
- ldus = phase->transform(ldus);
- return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
- }
+ if (can_reshape &&
+ load->outcnt() == 1 && load->unique_out() == this) {
+ if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
+ Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
+ load->in(MemNode::Memory),
+ load->in(MemNode::Address),
+ load->adr_type());
+ ldus = phase->transform(ldus);
+ return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
+ }
- // Masking sign bits off of a Byte? Do an unsigned byte load plus
- // an and.
- if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
- Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
- load->in(MemNode::Memory),
- load->in(MemNode::Address),
- load->adr_type());
- ldub = phase->transform(ldub);
- return new (phase->C) AndINode(ldub, phase->intcon(mask));
+ // Masking sign bits off of a Byte? Do an unsigned byte load plus
+ // an and.
+ if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
+ Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
+ load->in(MemNode::Memory),
+ load->in(MemNode::Address),
+ load->adr_type());
+ ldub = phase->transform(ldub);
+ return new (phase->C) AndINode(ldub, phase->intcon(mask));
+ }
}
// Masking off sign bits? Dont make them!
@@ -923,7 +926,9 @@
set_req(2, phase->intcon(0));
return this;
}
- else if( ld->Opcode() == Op_LoadUS )
+ else if( can_reshape &&
+ ld->Opcode() == Op_LoadUS &&
+ ld->outcnt() == 1 && ld->unique_out() == shl)
// Replace zero-extension-load with sign-extension-load
return new (phase->C) LoadSNode( ld->in(MemNode::Control),
ld->in(MemNode::Memory),
--- a/hotspot/src/share/vm/opto/runtime.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -811,6 +811,48 @@
return TypeFunc::make(domain, range);
}
+// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
+const TypeFunc* OptoRuntime::aescrypt_block_Type() {
+ // create input type (domain)
+ int num_args = 3;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // src
+ fields[argp++] = TypePtr::NOTNULL; // dest
+ fields[argp++] = TypePtr::NOTNULL; // k array
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // no result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
+// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
+const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
+ // create input type (domain)
+ int num_args = 5;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // src
+ fields[argp++] = TypePtr::NOTNULL; // dest
+ fields[argp++] = TypePtr::NOTNULL; // k array
+ fields[argp++] = TypePtr::NOTNULL; // r array
+ fields[argp++] = TypeInt::INT; // src len
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // no result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
//------------- Interpreter state access for on stack replacement
const TypeFunc* OptoRuntime::osr_end_Type() {
// create input type (domain)
--- a/hotspot/src/share/vm/opto/runtime.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -280,6 +280,9 @@
static const TypeFunc* array_fill_Type();
+ static const TypeFunc* aescrypt_block_Type();
+ static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
--- a/hotspot/src/share/vm/opto/superword.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/superword.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -1776,16 +1776,15 @@
set_velt_type(n, container_type(n));
}
- // Propagate narrowed type backwards through operations
+ // Propagate integer narrowed type backwards through operations
// that don't depend on higher order bits
for (int i = _block.length() - 1; i >= 0; i--) {
Node* n = _block.at(i);
// Only integer types need be examined
- const Type* vt = velt_type(n);
- if (vt->basic_type() == T_INT) {
+ const Type* vtn = velt_type(n);
+ if (vtn->basic_type() == T_INT) {
uint start, end;
VectorNode::vector_operands(n, &start, &end);
- const Type* vt = velt_type(n);
for (uint j = start; j < end; j++) {
Node* in = n->in(j);
@@ -1801,6 +1800,24 @@
}
}
if (same_type) {
+ // For right shifts of small integer types (bool, byte, char, short)
+ // we need precise information about sign-ness. Only Load nodes have
+ // this information because Store nodes are the same for signed and
+ // unsigned values. And any arithmetic operation after a load may
+ // expand a value to signed Int so such right shifts can't be used
+ // because vector elements do not have upper bits of Int.
+ const Type* vt = vtn;
+ if (VectorNode::is_shift(in)) {
+ Node* load = in->in(1);
+ if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
+ vt = velt_type(load);
+ } else if (in->Opcode() != Op_LShiftI) {
+ // Widen type to Int to avoid creation of right shift vector
+ // (align + data_size(s1) check in stmts_can_pack() will fail).
+ // Note, left shifts work regardless type.
+ vt = TypeInt::INT;
+ }
+ }
set_velt_type(in, vt);
}
}
@@ -1841,7 +1858,20 @@
// Smallest type containing range of values
const Type* SuperWord::container_type(Node* n) {
if (n->is_Mem()) {
- return Type::get_const_basic_type(n->as_Mem()->memory_type());
+ BasicType bt = n->as_Mem()->memory_type();
+ if (n->is_Store() && (bt == T_CHAR)) {
+ // Use T_SHORT type instead of T_CHAR for stored values because any
+ // preceding arithmetic operation extends values to signed Int.
+ bt = T_SHORT;
+ }
+ if (n->Opcode() == Op_LoadUB) {
+ // Adjust type for unsigned byte loads, it is important for right shifts.
+ // T_BOOLEAN is used because there is no basic type representing type
+ // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
+ // size (one byte) and sign is important.
+ bt = T_BOOLEAN;
+ }
+ return Type::get_const_basic_type(bt);
}
const Type* t = _igvn.type(n);
if (t->basic_type() == T_INT) {
--- a/hotspot/src/share/vm/opto/type.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/type.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -61,7 +61,7 @@
{ Bad, T_ILLEGAL, "tuple:", false, Node::NotAMachineReg, relocInfo::none }, // Tuple
{ Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array
-#if defined(IA32) || defined(AMD64)
+#ifndef SPARC
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
--- a/hotspot/src/share/vm/opto/vectornode.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/opto/vectornode.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -29,8 +29,7 @@
//------------------------------VectorNode--------------------------------------
// Return the vector operator for the specified scalar operation
-// and vector length. Also used to check if the code generator
-// supports the vector operation.
+// and vector length.
int VectorNode::opcode(int sopc, BasicType bt) {
switch (sopc) {
case Op_AddI:
@@ -75,7 +74,7 @@
case T_BYTE: return 0; // Unimplemented
case T_CHAR:
case T_SHORT: return Op_MulVS;
- case T_INT: return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1
+ case T_INT: return Op_MulVI;
}
ShouldNotReachHere();
case Op_MulF:
@@ -104,9 +103,9 @@
return Op_LShiftVL;
case Op_RShiftI:
switch (bt) {
- case T_BOOLEAN:
+ case T_BOOLEAN:return Op_URShiftVB; // boolean is unsigned value
+ case T_CHAR: return Op_URShiftVS; // char is unsigned value
case T_BYTE: return Op_RShiftVB;
- case T_CHAR:
case T_SHORT: return Op_RShiftVS;
case T_INT: return Op_RShiftVI;
}
@@ -116,10 +115,14 @@
return Op_RShiftVL;
case Op_URShiftI:
switch (bt) {
- case T_BOOLEAN:
- case T_BYTE: return Op_URShiftVB;
- case T_CHAR:
- case T_SHORT: return Op_URShiftVS;
+ case T_BOOLEAN:return Op_URShiftVB;
+ case T_CHAR: return Op_URShiftVS;
+ case T_BYTE:
+ case T_SHORT: return 0; // Vector logical right shift for signed short
+ // values produces incorrect Java result for
+ // negative data because java code should convert
+ // a short value into int value with sign
+ // extension before a shift.
case T_INT: return Op_URShiftVI;
}
ShouldNotReachHere();
@@ -157,12 +160,14 @@
return 0; // Unimplemented
}
+// Also used to check if the code generator
+// supports the vector operation.
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
if (is_java_primitive(bt) &&
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt);
- return vopc > 0 && Matcher::has_match_rule(vopc);
+ return vopc > 0 && Matcher::match_rule_supported(vopc);
}
return false;
}
--- a/hotspot/src/share/vm/prims/unsafe.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/prims/unsafe.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -124,6 +124,8 @@
assert((void*)p->obj_field_addr<oop>((jint)byte_offset) == ptr_plus_disp,
"raw [ptr+disp] must be consistent with oop::field_base");
}
+ jlong p_size = HeapWordSize * (jlong)(p->size());
+ assert(byte_offset < p_size, err_msg("Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size));
}
#endif
if (sizeof(char*) == sizeof(jint)) // (this constant folds!)
--- a/hotspot/src/share/vm/runtime/globals.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -533,6 +533,9 @@
product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \
\
+ product(bool, UseAES, false, \
+ "Control whether AES instructions can be used on x86/x64") \
+ \
product(uintx, LargePageSizeInBytes, 0, \
"Large page size (0 to let VM choose the page size") \
\
@@ -635,6 +638,9 @@
product(bool, UseSSE42Intrinsics, false, \
"SSE4.2 versions of intrinsics") \
\
+ product(bool, UseAESIntrinsics, false, \
+ "use intrinsics for AES versions of crypto") \
+ \
develop(bool, TraceCallFixup, false, \
"traces all call fixups") \
\
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Fri Oct 26 12:06:55 2012 -0700
@@ -120,6 +120,10 @@
address StubRoutines::_arrayof_jshort_fill;
address StubRoutines::_arrayof_jint_fill;
+address StubRoutines::_aescrypt_encryptBlock = NULL;
+address StubRoutines::_aescrypt_decryptBlock = NULL;
+address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
+address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
double (* StubRoutines::_intrinsic_log )(double) = NULL;
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp Fri Oct 26 12:06:55 2012 -0700
@@ -199,6 +199,11 @@
// zero heap space aligned to jlong (8 bytes)
static address _zero_aligned_words;
+ static address _aescrypt_encryptBlock;
+ static address _aescrypt_decryptBlock;
+ static address _cipherBlockChaining_encryptAESCrypt;
+ static address _cipherBlockChaining_decryptAESCrypt;
+
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
// constant folding in the compiler to ensure equivalence. If the
@@ -330,6 +335,11 @@
static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
static address arrayof_jint_fill() { return _arrayof_jint_fill; }
+ static address aescrypt_encryptBlock() { return _aescrypt_encryptBlock; }
+ static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
+ static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
+ static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
+
static address select_fill_function(BasicType t, bool aligned, const char* &name);
static address zero_aligned_words() { return _zero_aligned_words; }
--- a/hotspot/test/compiler/6340864/TestByteVect.java Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/test/compiler/6340864/TestByteVect.java Fri Oct 26 12:06:55 2012 -0700
@@ -33,7 +33,7 @@
public class TestByteVect {
private static final int ARRLEN = 997;
private static final int ITERS = 11000;
- private static final int ADD_INIT = 0;
+ private static final int ADD_INIT = 63;
private static final int BIT_MASK = 0xB7;
private static final int VALUE = 3;
private static final int SHIFT = 8;
@@ -76,6 +76,7 @@
test_subc(a0, a1);
test_subv(a0, a1, (byte)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (byte)VALUE);
test_mula(a0, a1, a2);
@@ -88,6 +89,7 @@
test_divc_n(a0, a1);
test_divv(a0, a1, (byte)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (byte)BIT_MASK);
test_anda(a0, a1, a4);
@@ -97,30 +99,49 @@
test_xorc(a0, a1);
test_xorv(a0, a1, (byte)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
test_pack2(p2, a1);
test_unpack2(a0, p2);
test_pack2_swap(p2, a1);
@@ -369,6 +390,60 @@
errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
@@ -805,6 +880,84 @@
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1);
}
end = System.currentTimeMillis();
@@ -1036,6 +1189,26 @@
a0[i] = (byte)(a1[i]<<b);
}
}
+ static void test_sllc_add(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -1062,6 +1235,26 @@
a0[i] = (byte)(a1[i]>>>b);
}
}
+ static void test_srlc_add(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(byte[] a0, byte[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -1088,6 +1281,26 @@
a0[i] = (byte)(a1[i]>>b);
}
}
+ static void test_srac_add(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(byte[] a0, byte[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(byte[] a0, byte[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (byte)((a1[i] & b)>>VALUE);
+ }
+ }
static void test_pack2(short[] p2, byte[] a1) {
if (p2.length*2 > a1.length) return;
--- a/hotspot/test/compiler/6340864/TestIntVect.java Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/test/compiler/6340864/TestIntVect.java Fri Oct 26 12:06:55 2012 -0700
@@ -74,6 +74,7 @@
test_subc(a0, a1);
test_subv(a0, a1, (int)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (int)VALUE);
test_mula(a0, a1, a2);
@@ -86,6 +87,7 @@
test_divc_n(a0, a1);
test_divv(a0, a1, (int)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (int)BIT_MASK);
test_anda(a0, a1, a4);
@@ -95,30 +97,49 @@
test_xorc(a0, a1);
test_xorv(a0, a1, (int)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
test_pack2(p2, a1);
test_unpack2(a0, p2);
test_pack2_swap(p2, a1);
@@ -359,6 +380,60 @@
errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
@@ -727,6 +802,84 @@
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1);
}
end = System.currentTimeMillis();
@@ -908,6 +1061,26 @@
a0[i] = (int)(a1[i]<<b);
}
}
+ static void test_sllc_add(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -934,6 +1107,26 @@
a0[i] = (int)(a1[i]>>>b);
}
}
+ static void test_srlc_add(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -960,6 +1153,26 @@
a0[i] = (int)(a1[i]>>b);
}
}
+ static void test_srac_add(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(int[] a0, int[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)((a1[i] & b)>>VALUE);
+ }
+ }
static void test_pack2(long[] p2, int[] a1) {
if (p2.length*2 > a1.length) return;
--- a/hotspot/test/compiler/6340864/TestLongVect.java Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/test/compiler/6340864/TestLongVect.java Fri Oct 26 12:06:55 2012 -0700
@@ -73,6 +73,7 @@
test_subc(a0, a1);
test_subv(a0, a1, (long)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (long)VALUE);
test_mula(a0, a1, a2);
@@ -85,6 +86,7 @@
test_divc_n(a0, a1);
test_divv(a0, a1, (long)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (long)BIT_MASK);
test_anda(a0, a1, a4);
@@ -94,30 +96,48 @@
test_xorc(a0, a1);
test_xorv(a0, a1, (long)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
}
// Test and verify results
System.out.println("Verification");
@@ -354,6 +374,60 @@
errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
}
if (errn > 0)
@@ -696,6 +770,84 @@
end = System.currentTimeMillis();
System.out.println("test_srav_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
return errn;
}
@@ -854,6 +1006,26 @@
a0[i] = (long)(a1[i]<<b);
}
}
+ static void test_sllc_add(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -880,6 +1052,26 @@
a0[i] = (long)(a1[i]>>>b);
}
}
+ static void test_srlc_add(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(long[] a0, long[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -906,6 +1098,26 @@
a0[i] = (long)(a1[i]>>b);
}
}
+ static void test_srac_add(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(long[] a0, long[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(long[] a0, long[] a1, long b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (long)((a1[i] & b)>>VALUE);
+ }
+ }
static int verify(String text, int i, long elem, long val) {
if (elem != val) {
--- a/hotspot/test/compiler/6340864/TestShortVect.java Fri Oct 26 08:38:22 2012 -0700
+++ b/hotspot/test/compiler/6340864/TestShortVect.java Fri Oct 26 12:06:55 2012 -0700
@@ -75,6 +75,7 @@
test_subc(a0, a1);
test_subv(a0, a1, (short)VALUE);
test_suba(a0, a1, a2);
+
test_mulc(a0, a1);
test_mulv(a0, a1, (short)VALUE);
test_mula(a0, a1, a2);
@@ -87,6 +88,7 @@
test_divc_n(a0, a1);
test_divv(a0, a1, (short)-VALUE);
test_diva(a0, a1, a3);
+
test_andc(a0, a1);
test_andv(a0, a1, (short)BIT_MASK);
test_anda(a0, a1, a4);
@@ -96,30 +98,49 @@
test_xorc(a0, a1);
test_xorv(a0, a1, (short)BIT_MASK);
test_xora(a0, a1, a4);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
test_srlv(a0, a1, VALUE);
test_srac(a0, a1);
test_srav(a0, a1, VALUE);
+
test_sllc_n(a0, a1);
test_sllv(a0, a1, -VALUE);
test_srlc_n(a0, a1);
test_srlv(a0, a1, -VALUE);
test_srac_n(a0, a1);
test_srav(a0, a1, -VALUE);
+
test_sllc_o(a0, a1);
test_sllv(a0, a1, SHIFT);
test_srlc_o(a0, a1);
test_srlv(a0, a1, SHIFT);
test_srac_o(a0, a1);
test_srav(a0, a1, SHIFT);
+
test_sllc_on(a0, a1);
test_sllv(a0, a1, -SHIFT);
test_srlc_on(a0, a1);
test_srlv(a0, a1, -SHIFT);
test_srac_on(a0, a1);
test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
test_pack2(p2, a1);
test_unpack2(a0, p2);
test_pack2_swap(p2, a1);
@@ -364,6 +385,60 @@
errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT)));
}
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (short)(((short)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (short)(((short)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
test_pack2(p2, a1);
for (int i=0; i<ARRLEN/2; i++) {
errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
@@ -762,6 +837,84 @@
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_pack2(p2, a1);
}
end = System.currentTimeMillis();
@@ -968,6 +1121,26 @@
a0[i] = (short)(a1[i]<<b);
}
}
+ static void test_sllc_add(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & b)<<VALUE);
+ }
+ }
static void test_srlc(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -994,6 +1167,26 @@
a0[i] = (short)(a1[i]>>>b);
}
}
+ static void test_srlc_add(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & b)>>>VALUE);
+ }
+ }
static void test_srac(short[] a0, short[] a1) {
for (int i = 0; i < a0.length; i+=1) {
@@ -1020,6 +1213,26 @@
a0[i] = (short)(a1[i]>>b);
}
}
+ static void test_srac_add(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(short[] a0, short[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(short[] a0, short[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (short)((a1[i] & b)>>VALUE);
+ }
+ }
static void test_pack2(int[] p2, short[] a1) {
if (p2.length*2 > a1.length) return;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7184394/TestAESBase.java Fri Oct 26 12:06:55 2012 -0700
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+import javax.crypto.KeyGenerator;
+import javax.crypto.SecretKey;
+import javax.crypto.spec.IvParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
+import java.security.AlgorithmParameters;
+
+import java.util.Random;
+import java.util.Arrays;
+
+abstract public class TestAESBase {
+ int msgSize = Integer.getInteger("msgSize", 646);
+ boolean checkOutput = Boolean.getBoolean("checkOutput");
+ boolean noReinit = Boolean.getBoolean("noReinit");
+ int keySize = Integer.getInteger("keySize", 128);
+ String algorithm = System.getProperty("algorithm", "AES");
+ String mode = System.getProperty("mode", "CBC");
+ byte[] input;
+ byte[] encode;
+ byte[] expectedEncode;
+ byte[] decode;
+ byte[] expectedDecode;
+ Random random = new Random(0);
+ Cipher cipher;
+ Cipher dCipher;
+ String paddingStr = "PKCS5Padding";
+ AlgorithmParameters algParams;
+ SecretKey key;
+ int ivLen;
+
+ static int numThreads = 0;
+ int threadId;
+ static synchronized int getThreadId() {
+ int id = numThreads;
+ numThreads++;
+ return id;
+ }
+
+ abstract public void run();
+
+ public void prepare() {
+ try {
+ System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
+
+ int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
+ byte keyBytes[] = new byte[keyLenBytes];
+ if (keySize == 128)
+ keyBytes = new byte[] {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7};
+ else
+ random.nextBytes(keyBytes);
+
+ key = new SecretKeySpec(keyBytes, algorithm);
+ if (threadId == 0) {
+ System.out.println("Algorithm: " + key.getAlgorithm() + "("
+ + key.getEncoded().length * 8 + "bit)");
+ }
+ input = new byte[msgSize];
+ for (int i=0; i<input.length; i++) {
+ input[i] = (byte) (i & 0xff);
+ }
+
+ cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+ dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
+
+ ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
+ IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
+
+ cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+ algParams = cipher.getParameters();
+ dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+ if (threadId == 0) {
+ childShowCipher();
+ }
+
+ // do one encode and decode in preparation
+ // this will also create the encode buffer and decode buffer
+ encode = cipher.doFinal(input);
+ decode = dCipher.doFinal(encode);
+ if (checkOutput) {
+ expectedEncode = (byte[]) encode.clone();
+ expectedDecode = (byte[]) decode.clone();
+ showArray(key.getEncoded() , "key: ");
+ showArray(input, "input: ");
+ showArray(encode, "encode: ");
+ showArray(decode, "decode: ");
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ void showArray(byte b[], String name) {
+ System.out.format("%s [%d]: ", name, b.length);
+ for (int i=0; i<Math.min(b.length, 32); i++) {
+ System.out.format("%02x ", b[i] & 0xff);
+ }
+ System.out.println();
+ }
+
+ void compareArrays(byte b[], byte exp[]) {
+ if (b.length != exp.length) {
+ System.out.format("different lengths for actual and expected output arrays\n");
+ showArray(b, "test: ");
+ showArray(exp, "exp : ");
+ System.exit(1);
+ }
+ for (int i=0; i< exp.length; i++) {
+ if (b[i] != exp[i]) {
+ System.out.format("output error at index %d: got %02x, expected %02x\n", i, b[i] & 0xff, exp[i] & 0xff);
+ showArray(b, "test: ");
+ showArray(exp, "exp : ");
+ System.exit(1);
+ }
+ }
+ }
+
+
+ void showCipher(Cipher c, String kind) {
+ System.out.println(kind + " cipher provider: " + cipher.getProvider());
+ System.out.println(kind + " cipher algorithm: " + cipher.getAlgorithm());
+ }
+
+ abstract void childShowCipher();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7184394/TestAESDecode.java Fri Oct 26 12:06:55 2012 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+
+public class TestAESDecode extends TestAESBase {
+ @Override
+ public void run() {
+ try {
+ if (!noReinit) dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
+ if (checkOutput) {
+ // checked version creates new output buffer each time
+ decode = dCipher.doFinal(encode, 0, encode.length);
+ compareArrays(decode, expectedDecode);
+ } else {
+ // non-checked version outputs to existing encode buffer for maximum speed
+ decode = new byte[dCipher.getOutputSize(encode.length)];
+ dCipher.doFinal(encode, 0, encode.length, decode);
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ @Override
+ void childShowCipher() {
+ showCipher(dCipher, "Decryption");
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7184394/TestAESEncode.java Fri Oct 26 12:06:55 2012 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @author Tom Deneau
+ */
+
+import javax.crypto.Cipher;
+
+public class TestAESEncode extends TestAESBase {
+ @Override
+ public void run() {
+ try {
+ if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+ if (checkOutput) {
+ // checked version creates new output buffer each time
+ encode = cipher.doFinal(input, 0, msgSize);
+ compareArrays(encode, expectedEncode);
+ } else {
+ // non-checked version outputs to existing encode buffer for maximum speed
+ encode = new byte[cipher.getOutputSize(msgSize)];
+ cipher.doFinal(input, 0, msgSize, encode);
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ @Override
+ void childShowCipher() {
+ showCipher(cipher, "Encryption");
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7184394/TestAESMain.java Fri Oct 26 12:06:55 2012 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7184394
+ * @summary add intrinsics to use AES instructions
+ *
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
+ *
+ * @author Tom Deneau
+ */
+
+public class TestAESMain {
+ public static void main(String[] args) {
+ int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 1000000);
+ System.out.println(iters + " iterations");
+ TestAESEncode etest = new TestAESEncode();
+ etest.prepare();
+ long start = System.nanoTime();
+ for (int i=0; i<iters; i++) {
+ etest.run();
+ }
+ long end = System.nanoTime();
+ System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+
+ TestAESDecode dtest = new TestAESDecode();
+ dtest.prepare();
+ start = System.nanoTime();
+ for (int i=0; i<iters; i++) {
+ dtest.run();
+ }
+ end = System.nanoTime();
+ System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/8000805/Test8000805.java Fri Oct 26 12:06:55 2012 -0700
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8000805
+ * @summary JMM issue: short loads are non-atomic
+ *
+ * @run main/othervm -server -XX:-TieredCompilation -Xcomp -XX:+PrintCompilation -XX:CompileOnly=Test8000805.loadS2LmaskFF,Test8000805.loadS2Lmask16,Test8000805.loadS2Lmask13,Test8000805.loadUS_signExt,Test8000805.loadB2L_mask8 Test8000805
+ */
+
+public class Test8000805 {
+ static long loadS2LmaskFF (short[] sa) { return sa[0] & 0xFF; }
+ static long loadS2LmaskFF_1 (short[] sa) { return sa[0] & 0xFF; }
+
+ static long loadS2Lmask16 (short[] sa) { return sa[0] & 0xFFFE; }
+ static long loadS2Lmask16_1 (short[] sa) { return sa[0] & 0xFFFE; }
+
+ static long loadS2Lmask13 (short[] sa) { return sa[0] & 0x0FFF; }
+ static long loadS2Lmask13_1 (short[] sa) { return sa[0] & 0x0FFF; }
+
+ static int loadUS_signExt (char[] ca) { return (ca[0] << 16) >> 16; }
+ static int loadUS_signExt_1 (char[] ca) { return (ca[0] << 16) >> 16; }
+
+ static long loadB2L_mask8 (byte[] ba) { return ba[0] & 0x55; }
+ static long loadB2L_mask8_1 (byte[] ba) { return ba[0] & 0x55; }
+
+ public static void main(String[] args) {
+ for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) {
+ byte[] ba = new byte[] { (byte) i};
+
+ { long v1 = loadB2L_mask8(ba);
+ long v2 = loadB2L_mask8_1(ba);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadB2L_mask8 failed: %x != %x", v1, v2)); }
+ }
+
+ for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
+ short[] sa = new short[] { (short)i };
+ char[] ca = new char[] { (char)i };
+
+ { long v1 = loadS2LmaskFF(sa);
+ long v2 = loadS2LmaskFF_1(sa);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadS2LmaskFF failed: %x != %x", v1, v2)); }
+
+ { long v1 = loadS2Lmask16(sa);
+ long v2 = loadS2Lmask16_1(sa);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadS2Lmask16 failed: %x != %x", v1, v2)); }
+
+ { long v1 = loadS2Lmask13(sa);
+ long v2 = loadS2Lmask13_1(sa);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadS2Lmask13 failed: %x != %x", v1, v2)); }
+
+ { int v1 = loadUS_signExt(ca);
+ int v2 = loadUS_signExt_1(ca);
+ if (v1 != v2)
+ throw new InternalError(String.format("loadUS_signExt failed: %x != %x", v1, v2)); }
+ }
+
+ System.out.println("TEST PASSED.");
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/8001183/TestCharVect.java Fri Oct 26 12:06:55 2012 -0700
@@ -0,0 +1,1332 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8001183
+ * @summary incorrect results of char vectors right shift operaiton
+ *
+ * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestCharVect
+ */
+
+public class TestCharVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int ADD_INIT = Character.MAX_VALUE-500;
+ private static final int BIT_MASK = 0xB731;
+ private static final int VALUE = 7;
+ private static final int SHIFT = 16;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Char vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ char[] a0 = new char[ARRLEN];
+ char[] a1 = new char[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ short[] a3 = new short[ARRLEN];
+ short[] a4 = new short[ARRLEN];
+ int[] p2 = new int[ARRLEN/2];
+ long[] p4 = new long[ARRLEN/4];
+ // Initialize
+ int gold_sum = 0;
+ for (int i=0; i<ARRLEN; i++) {
+ char val = (char)(ADD_INIT+i);
+ gold_sum += val;
+ a1[i] = val;
+ a2[i] = VALUE;
+ a3[i] = -VALUE;
+ a4[i] = (short)BIT_MASK;
+ }
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_sum(a1);
+ test_addc(a0, a1);
+ test_addv(a0, a1, (char)VALUE);
+ test_adda(a0, a1, a2);
+ test_subc(a0, a1);
+ test_subv(a0, a1, (char)VALUE);
+ test_suba(a0, a1, a2);
+
+ test_mulc(a0, a1);
+ test_mulv(a0, a1, (char)VALUE);
+ test_mula(a0, a1, a2);
+ test_divc(a0, a1);
+ test_divv(a0, a1, VALUE);
+ test_diva(a0, a1, a2);
+ test_mulc_n(a0, a1);
+ test_mulv(a0, a1, (char)-VALUE);
+ test_mula(a0, a1, a3);
+ test_divc_n(a0, a1);
+ test_divv(a0, a1, -VALUE);
+ test_diva(a0, a1, a3);
+
+ test_andc(a0, a1);
+ test_andv(a0, a1, (short)BIT_MASK);
+ test_anda(a0, a1, a4);
+ test_orc(a0, a1);
+ test_orv(a0, a1, (short)BIT_MASK);
+ test_ora(a0, a1, a4);
+ test_xorc(a0, a1);
+ test_xorv(a0, a1, (short)BIT_MASK);
+ test_xora(a0, a1, a4);
+
+ test_sllc(a0, a1);
+ test_sllv(a0, a1, VALUE);
+ test_srlc(a0, a1);
+ test_srlv(a0, a1, VALUE);
+ test_srac(a0, a1);
+ test_srav(a0, a1, VALUE);
+
+ test_sllc_n(a0, a1);
+ test_sllv(a0, a1, -VALUE);
+ test_srlc_n(a0, a1);
+ test_srlv(a0, a1, -VALUE);
+ test_srac_n(a0, a1);
+ test_srav(a0, a1, -VALUE);
+
+ test_sllc_o(a0, a1);
+ test_sllv(a0, a1, SHIFT);
+ test_srlc_o(a0, a1);
+ test_srlv(a0, a1, SHIFT);
+ test_srac_o(a0, a1);
+ test_srav(a0, a1, SHIFT);
+
+ test_sllc_on(a0, a1);
+ test_sllv(a0, a1, -SHIFT);
+ test_srlc_on(a0, a1);
+ test_srlv(a0, a1, -SHIFT);
+ test_srac_on(a0, a1);
+ test_srav(a0, a1, -SHIFT);
+
+ test_sllc_add(a0, a1);
+ test_sllv_add(a0, a1, ADD_INIT);
+ test_srlc_add(a0, a1);
+ test_srlv_add(a0, a1, ADD_INIT);
+ test_srac_add(a0, a1);
+ test_srav_add(a0, a1, ADD_INIT);
+
+ test_sllc_and(a0, a1);
+ test_sllv_and(a0, a1, BIT_MASK);
+ test_srlc_and(a0, a1);
+ test_srlv_and(a0, a1, BIT_MASK);
+ test_srac_and(a0, a1);
+ test_srav_and(a0, a1, BIT_MASK);
+
+ test_pack2(p2, a1);
+ test_unpack2(a0, p2);
+ test_pack2_swap(p2, a1);
+ test_unpack2_swap(a0, p2);
+ test_pack4(p4, a1);
+ test_unpack4(a0, p4);
+ test_pack4_swap(p4, a1);
+ test_unpack4_swap(a0, p4);
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ int sum = test_sum(a1);
+ if (sum != gold_sum) {
+ System.err.println("test_sum: " + sum + " != " + gold_sum);
+ errn++;
+ }
+
+ test_addc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_addc: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+ }
+ test_addv(a0, a1, (char)VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_addv: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+ }
+ test_adda(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_adda: ", i, a0[i], (char)((char)(ADD_INIT+i)+VALUE));
+ }
+
+ test_subc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_subc: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+ }
+ test_subv(a0, a1, (char)VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_subv: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+ }
+ test_suba(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_suba: ", i, a0[i], (char)((char)(ADD_INIT+i)-VALUE));
+ }
+
+ test_mulc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulc: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+ }
+ test_mulv(a0, a1, (char)VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulv: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+ }
+ test_mula(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mula: ", i, a0[i], (char)((char)(ADD_INIT+i)*VALUE));
+ }
+
+ test_divc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divc: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+ }
+ test_divv(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divv: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+ }
+ test_diva(a0, a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_diva: ", i, a0[i], (char)((char)(ADD_INIT+i)/VALUE));
+ }
+
+ test_mulc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+ }
+ test_mulv(a0, a1, (char)-VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mulv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+ }
+ test_mula(a0, a1, a3);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_mula_n: ", i, a0[i], (char)((char)(ADD_INIT+i)*(-VALUE)));
+ }
+
+ test_divc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+ }
+ test_divv(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_divv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+ }
+ test_diva(a0, a1, a3);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_diva_n: ", i, a0[i], (char)((char)(ADD_INIT+i)/(-VALUE)));
+ }
+
+ test_andc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_andc: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+ }
+ test_andv(a0, a1, (short)BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_andv: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+ }
+ test_anda(a0, a1, a4);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_anda: ", i, a0[i], (char)((char)(ADD_INIT+i)&BIT_MASK));
+ }
+
+ test_orc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_orc: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+ }
+ test_orv(a0, a1, (short)BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_orv: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+ }
+ test_ora(a0, a1, a4);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ora: ", i, a0[i], (char)((char)(ADD_INIT+i)|BIT_MASK));
+ }
+
+ test_xorc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_xorc: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+ }
+ test_xorv(a0, a1, (short)BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_xorv: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+ }
+ test_xora(a0, a1, a4);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_xora: ", i, a0[i], (char)((char)(ADD_INIT+i)^BIT_MASK));
+ }
+
+ test_sllc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc: ", i, a0[i], (char)((char)(ADD_INIT+i)<<VALUE));
+ }
+ test_sllv(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv: ", i, a0[i], (char)((char)(ADD_INIT+i)<<VALUE));
+ }
+
+ test_srlc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>VALUE));
+ }
+ test_srlv(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>VALUE));
+ }
+
+ test_srac(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac: ", i, a0[i], (char)((char)(ADD_INIT+i)>>VALUE));
+ }
+ test_srav(a0, a1, VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav: ", i, a0[i], (char)((char)(ADD_INIT+i)>>VALUE));
+ }
+
+ test_sllc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-VALUE)));
+ }
+ test_sllv(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-VALUE)));
+ }
+
+ test_srlc_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-VALUE)));
+ }
+ test_srlv(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-VALUE)));
+ }
+
+ test_srac_n(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-VALUE)));
+ }
+ test_srav(a0, a1, -VALUE);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_n: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-VALUE)));
+ }
+
+ test_sllc_o(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_o: ", i, a0[i], (char)((char)(ADD_INIT+i)<<SHIFT));
+ }
+ test_sllv(a0, a1, SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_o: ", i, a0[i], (char)((char)(ADD_INIT+i)<<SHIFT));
+ }
+
+ test_srlc_o(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>SHIFT));
+ }
+ test_srlv(a0, a1, SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>SHIFT));
+ }
+
+ test_srac_o(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>SHIFT));
+ }
+ test_srav(a0, a1, SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_o: ", i, a0[i], (char)((char)(ADD_INIT+i)>>SHIFT));
+ }
+
+ test_sllc_on(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_on: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-SHIFT)));
+ }
+ test_sllv(a0, a1, -SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_on: ", i, a0[i], (char)((char)(ADD_INIT+i)<<(-SHIFT)));
+ }
+
+ test_srlc_on(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-SHIFT)));
+ }
+ test_srlv(a0, a1, -SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>>(-SHIFT)));
+ }
+
+ test_srac_on(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-SHIFT)));
+ }
+ test_srav(a0, a1, -SHIFT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_on: ", i, a0[i], (char)((char)(ADD_INIT+i)>>(-SHIFT)));
+ }
+
+ test_sllc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+ test_sllv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+ }
+
+ test_srlc_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+ test_srlv_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+ }
+
+ test_srac_add(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+ test_srav_add(a0, a1, ADD_INIT);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_add: ", i, a0[i], (char)(((char)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+ }
+
+ test_sllc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllc_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+ test_sllv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_sllv_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+ }
+
+ test_srlc_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlc_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+ test_srlv_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srlv_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+ }
+
+ test_srac_and(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srac_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+ test_srav_and(a0, a1, BIT_MASK);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_srav_and: ", i, a0[i], (char)(((char)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+ }
+
+ test_pack2(p2, a1);
+ for (int i=0; i<ARRLEN/2; i++) {
+ errn += verify("test_pack2: ", i, p2[i], ((int)(ADD_INIT+2*i) & 0xFFFF) | ((int)(ADD_INIT+2*i+1) << 16));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack2(a0, p2);
+ for (int i=0; i<(ARRLEN&(-2)); i++) {
+ errn += verify("test_unpack2: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ test_pack2_swap(p2, a1);
+ for (int i=0; i<ARRLEN/2; i++) {
+ errn += verify("test_pack2_swap: ", i, p2[i], ((int)(ADD_INIT+2*i+1) & 0xFFFF) | ((int)(ADD_INIT+2*i) << 16));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack2_swap(a0, p2);
+ for (int i=0; i<(ARRLEN&(-2)); i++) {
+ errn += verify("test_unpack2_swap: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ test_pack4(p4, a1);
+ for (int i=0; i<ARRLEN/4; i++) {
+ errn += verify("test_pack4: ", i, p4[i], ((long)(ADD_INIT+4*i+0) & 0xFFFFl) |
+ (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 16) |
+ (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 32) |
+ (((long)(ADD_INIT+4*i+3) & 0xFFFFl) << 48));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack4(a0, p4);
+ for (int i=0; i<(ARRLEN&(-4)); i++) {
+ errn += verify("test_unpack4: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ test_pack4_swap(p4, a1);
+ for (int i=0; i<ARRLEN/4; i++) {
+ errn += verify("test_pack4_swap: ", i, p4[i], ((long)(ADD_INIT+4*i+3) & 0xFFFFl) |
+ (((long)(ADD_INIT+4*i+2) & 0xFFFFl) << 16) |
+ (((long)(ADD_INIT+4*i+1) & 0xFFFFl) << 32) |
+ (((long)(ADD_INIT+4*i+0) & 0xFFFFl) << 48));
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a0[i] = (char)-1;
+ }
+ test_unpack4_swap(a0, p4);
+ for (int i=0; i<(ARRLEN&(-4)); i++) {
+ errn += verify("test_unpack4_swap: ", i, a0[i], (char)(ADD_INIT+i));
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sum(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sum: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_addc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_addc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_addv(a0, a1, (char)VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_addv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_adda(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_adda: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_subc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_subc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_subv(a0, a1, (char)VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_subv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_suba(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_suba: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulv(a0, a1, (char)VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mula(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mula: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divv(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_diva(a0, a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_diva: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mulv(a0, a1, (char)-VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mulv_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_mula(a0, a1, a3);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_mula_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_divv(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_divv_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_diva(a0, a1, a3);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_diva_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_andc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_andc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_andv(a0, a1, (short)BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_andv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_anda(a0, a1, a4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_anda: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_orc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_orc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_orv(a0, a1, (short)BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_orv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ora(a0, a1, a4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ora: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_xorc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_xorc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_xorv(a0, a1, (short)BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_xorv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_xora(a0, a1, a4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_xora: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_n(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, -VALUE);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_n: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_o(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_o: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_o: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_o(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_o: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_o: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_o(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_o: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_o: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_on(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv(a0, a1, -SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_on: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_on(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv(a0, a1, -SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_on: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_on(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_on: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav(a0, a1, -SHIFT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_on: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_add(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_add: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_add(a0, a1, ADD_INIT);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_add: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_sllv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_sllv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlc_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlc_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srlv_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srlv_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srac_and(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srac_and: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_srav_and(a0, a1, BIT_MASK);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_srav_and: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack2(p2, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack2: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack2(a0, p2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack2: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack2_swap(p2, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack2_swap: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack2_swap(a0, p2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack2_swap: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack4(p4, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack4: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack4(a0, p4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack4: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_pack4_swap(p4, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_pack4_swap: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_unpack4_swap(a0, p4);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_unpack4_swap: " + (end - start));
+
+ return errn;
+ }
+
+ static int test_sum(char[] a1) {
+ int sum = 0;
+ for (int i = 0; i < a1.length; i+=1) {
+ sum += a1[i];
+ }
+ return sum;
+ }
+
+ static void test_addc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]+VALUE);
+ }
+ }
+ static void test_addv(char[] a0, char[] a1, char b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]+b);
+ }
+ }
+ static void test_adda(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]+a2[i]);
+ }
+ }
+
+ static void test_subc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]-VALUE);
+ }
+ }
+ static void test_subv(char[] a0, char[] a1, char b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]-b);
+ }
+ }
+ static void test_suba(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]-a2[i]);
+ }
+ }
+
+ static void test_mulc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*VALUE);
+ }
+ }
+ static void test_mulc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*(-VALUE));
+ }
+ }
+ static void test_mulv(char[] a0, char[] a1, char b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*b);
+ }
+ }
+ static void test_mula(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]*a2[i]);
+ }
+ }
+
+ static void test_divc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/VALUE);
+ }
+ }
+ static void test_divc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/(-VALUE));
+ }
+ }
+ static void test_divv(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/b);
+ }
+ }
+ static void test_diva(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]/a2[i]);
+ }
+ }
+
+ static void test_andc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]&BIT_MASK);
+ }
+ }
+ static void test_andv(char[] a0, char[] a1, short b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]&b);
+ }
+ }
+ static void test_anda(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]&a2[i]);
+ }
+ }
+
+ static void test_orc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]|BIT_MASK);
+ }
+ }
+ static void test_orv(char[] a0, char[] a1, short b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]|b);
+ }
+ }
+ static void test_ora(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]|a2[i]);
+ }
+ }
+
+ static void test_xorc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]^BIT_MASK);
+ }
+ }
+ static void test_xorv(char[] a0, char[] a1, short b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]^b);
+ }
+ }
+ static void test_xora(char[] a0, char[] a1, short[] a2) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]^a2[i]);
+ }
+ }
+
+ static void test_sllc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<VALUE);
+ }
+ }
+ static void test_sllc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<(-VALUE));
+ }
+ }
+ static void test_sllc_o(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<SHIFT);
+ }
+ }
+ static void test_sllc_on(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<(-SHIFT));
+ }
+ }
+ static void test_sllv(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]<<b);
+ }
+ }
+ static void test_sllc_add(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + ADD_INIT)<<VALUE);
+ }
+ }
+ static void test_sllv_add(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + b)<<VALUE);
+ }
+ }
+ static void test_sllc_and(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & BIT_MASK)<<VALUE);
+ }
+ }
+ static void test_sllv_and(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & b)<<VALUE);
+ }
+ }
+
+ static void test_srlc(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>VALUE);
+ }
+ }
+ static void test_srlc_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>(-VALUE));
+ }
+ }
+ static void test_srlc_o(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>SHIFT);
+ }
+ }
+ static void test_srlc_on(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>(-SHIFT));
+ }
+ }
+ static void test_srlv(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>>b);
+ }
+ }
+ static void test_srlc_add(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + ADD_INIT)>>>VALUE);
+ }
+ }
+ static void test_srlv_add(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + b)>>>VALUE);
+ }
+ }
+ static void test_srlc_and(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & BIT_MASK)>>>VALUE);
+ }
+ }
+ static void test_srlv_and(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & b)>>>VALUE);
+ }
+ }
+
+ static void test_srac(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>VALUE);
+ }
+ }
+ static void test_srac_n(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>(-VALUE));
+ }
+ }
+ static void test_srac_o(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>SHIFT);
+ }
+ }
+ static void test_srac_on(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>(-SHIFT));
+ }
+ }
+ static void test_srav(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)(a1[i]>>b);
+ }
+ }
+ static void test_srac_add(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + ADD_INIT)>>VALUE);
+ }
+ }
+ static void test_srav_add(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] + b)>>VALUE);
+ }
+ }
+ static void test_srac_and(char[] a0, char[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & BIT_MASK)>>VALUE);
+ }
+ }
+ static void test_srav_and(char[] a0, char[] a1, int b) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (char)((a1[i] & b)>>VALUE);
+ }
+ }
+
+ static void test_pack2(int[] p2, char[] a1) {
+ if (p2.length*2 > a1.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l0 = (int)a1[i*2+0];
+ int l1 = (int)a1[i*2+1];
+ p2[i] = (l1 << 16) | (l0 & 0xFFFF);
+ }
+ }
+ static void test_unpack2(char[] a0, int[] p2) {
+ if (p2.length*2 > a0.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l = p2[i];
+ a0[i*2+0] = (char)(l & 0xFFFF);
+ a0[i*2+1] = (char)(l >> 16);
+ }
+ }
+ static void test_pack2_swap(int[] p2, char[] a1) {
+ if (p2.length*2 > a1.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l0 = (int)a1[i*2+0];
+ int l1 = (int)a1[i*2+1];
+ p2[i] = (l0 << 16) | (l1 & 0xFFFF);
+ }
+ }
+ static void test_unpack2_swap(char[] a0, int[] p2) {
+ if (p2.length*2 > a0.length) return;
+ for (int i = 0; i < p2.length; i+=1) {
+ int l = p2[i];
+ a0[i*2+0] = (char)(l >> 16);
+ a0[i*2+1] = (char)(l & 0xFFFF);
+ }
+ }
+
+ static void test_pack4(long[] p4, char[] a1) {
+ if (p4.length*4 > a1.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l0 = (long)a1[i*4+0];
+ long l1 = (long)a1[i*4+1];
+ long l2 = (long)a1[i*4+2];
+ long l3 = (long)a1[i*4+3];
+ p4[i] = (l0 & 0xFFFFl) |
+ ((l1 & 0xFFFFl) << 16) |
+ ((l2 & 0xFFFFl) << 32) |
+ ((l3 & 0xFFFFl) << 48);
+ }
+ }
+ static void test_unpack4(char[] a0, long[] p4) {
+ if (p4.length*4 > a0.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l = p4[i];
+ a0[i*4+0] = (char)(l & 0xFFFFl);
+ a0[i*4+1] = (char)(l >> 16);
+ a0[i*4+2] = (char)(l >> 32);
+ a0[i*4+3] = (char)(l >> 48);
+ }
+ }
+ static void test_pack4_swap(long[] p4, char[] a1) {
+ if (p4.length*4 > a1.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l0 = (long)a1[i*4+0];
+ long l1 = (long)a1[i*4+1];
+ long l2 = (long)a1[i*4+2];
+ long l3 = (long)a1[i*4+3];
+ p4[i] = (l3 & 0xFFFFl) |
+ ((l2 & 0xFFFFl) << 16) |
+ ((l1 & 0xFFFFl) << 32) |
+ ((l0 & 0xFFFFl) << 48);
+ }
+ }
+ static void test_unpack4_swap(char[] a0, long[] p4) {
+ if (p4.length*4 > a0.length) return;
+ for (int i = 0; i < p4.length; i+=1) {
+ long l = p4[i];
+ a0[i*4+0] = (char)(l >> 48);
+ a0[i*4+1] = (char)(l >> 32);
+ a0[i*4+2] = (char)(l >> 16);
+ a0[i*4+3] = (char)(l & 0xFFFFl);
+ }
+ }
+
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
+ return 1;
+ }
+ return 0;
+ }
+}