--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Tue Jan 14 14:51:47 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Tue Jan 14 17:46:48 2014 -0800
@@ -88,6 +88,7 @@
orncc_op3 = 0x16,
xnorcc_op3 = 0x17,
addccc_op3 = 0x18,
+ aes4_op3 = 0x19,
umulcc_op3 = 0x1a,
smulcc_op3 = 0x1b,
subccc_op3 = 0x1c,
@@ -121,6 +122,8 @@
fpop1_op3 = 0x34,
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
+ aes3_op3 = 0x36,
+ flog3_op3 = 0x36,
impdep2_op3 = 0x37,
jmpl_op3 = 0x38,
rett_op3 = 0x39,
@@ -172,41 +175,56 @@
enum opfs {
// selected opfs
- fmovs_opf = 0x01,
- fmovd_opf = 0x02,
+ fmovs_opf = 0x01,
+ fmovd_opf = 0x02,
- fnegs_opf = 0x05,
- fnegd_opf = 0x06,
+ fnegs_opf = 0x05,
+ fnegd_opf = 0x06,
- fadds_opf = 0x41,
- faddd_opf = 0x42,
- fsubs_opf = 0x45,
- fsubd_opf = 0x46,
+ fadds_opf = 0x41,
+ faddd_opf = 0x42,
+ fsubs_opf = 0x45,
+ fsubd_opf = 0x46,
- fmuls_opf = 0x49,
- fmuld_opf = 0x4a,
- fdivs_opf = 0x4d,
- fdivd_opf = 0x4e,
+ fmuls_opf = 0x49,
+ fmuld_opf = 0x4a,
+ fdivs_opf = 0x4d,
+ fdivd_opf = 0x4e,
+
+ fcmps_opf = 0x51,
+ fcmpd_opf = 0x52,
- fcmps_opf = 0x51,
- fcmpd_opf = 0x52,
+ fstox_opf = 0x81,
+ fdtox_opf = 0x82,
+ fxtos_opf = 0x84,
+ fxtod_opf = 0x88,
+ fitos_opf = 0xc4,
+ fdtos_opf = 0xc6,
+ fitod_opf = 0xc8,
+ fstod_opf = 0xc9,
+ fstoi_opf = 0xd1,
+ fdtoi_opf = 0xd2,
- fstox_opf = 0x81,
- fdtox_opf = 0x82,
- fxtos_opf = 0x84,
- fxtod_opf = 0x88,
- fitos_opf = 0xc4,
- fdtos_opf = 0xc6,
- fitod_opf = 0xc8,
- fstod_opf = 0xc9,
- fstoi_opf = 0xd1,
- fdtoi_opf = 0xd2,
+ mdtox_opf = 0x110,
+ mstouw_opf = 0x111,
+ mstosw_opf = 0x113,
+ mxtod_opf = 0x118,
+ mwtos_opf = 0x119,
+
+ aes_kexpand0_opf = 0x130,
+ aes_kexpand2_opf = 0x131
+ };
- mdtox_opf = 0x110,
- mstouw_opf = 0x111,
- mstosw_opf = 0x113,
- mxtod_opf = 0x118,
- mwtos_opf = 0x119
+ enum op5s {
+ aes_eround01_op5 = 0x00,
+ aes_eround23_op5 = 0x01,
+ aes_dround01_op5 = 0x02,
+ aes_dround23_op5 = 0x03,
+ aes_eround01_l_op5 = 0x04,
+ aes_eround23_l_op5 = 0x05,
+ aes_dround01_l_op5 = 0x06,
+ aes_dround23_l_op5 = 0x07,
+ aes_kexpand1_op5 = 0x08
};
enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez };
@@ -427,6 +445,7 @@
static int immed( bool i) { return u_field(i ? 1 : 0, 13, 13); }
static int opf_low6( int w) { return u_field(w, 10, 5); }
static int opf_low5( int w) { return u_field(w, 9, 5); }
+ static int op5( int x) { return u_field(x, 8, 5); }
static int trapcc( CC cc) { return u_field(cc, 12, 11); }
static int sx( int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit
static int opf( int x) { return u_field(x, 13, 5); }
@@ -451,6 +470,7 @@
static int fd( FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); };
static int fs1(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); };
static int fs2(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 4, 0); };
+ static int fs3(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13, 9); };
// some float instructions use this encoding on the op3 field
static int alt_op3(int op, FloatRegisterImpl::Width w) {
@@ -559,6 +579,12 @@
return x & ((1 << 10) - 1);
}
+ // AES crypto instructions supported only on certain processors
+ static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
+
+ // instruction only in VIS1
+ static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
+
// instruction only in VIS3
static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }
@@ -682,6 +708,24 @@
void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ // 4-operand AES instructions
+
+ void aes_eround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_eround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_dround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_dround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_eround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_eround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_dround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_dround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_kexpand1( FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+
+
+ // 3-operand AES instructions
+
+ void aes_kexpand0( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); }
+ void aes_kexpand2( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); }
+
// pp 136
inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
@@ -784,6 +828,10 @@
void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); }
+ // FXORs/FXORd instructions
+
+ void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); }
+
// pp 164
void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Jan 14 14:51:47 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Jan 14 17:46:48 2014 -0800
@@ -3304,6 +3304,775 @@
}
}
+ address generate_aescrypt_encryptBlock() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
+ Label L_doLast128bit, L_storeOutput;
+ address start = __ pc();
+ Register from = O0; // source byte array
+ Register to = O1; // destination byte array
+ Register key = O2; // expanded key array
+ const Register keylen = O4; //reg for storing expanded key array length
+
+ // read expanded key length
+ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+ // load input into F54-F56; F30-F31 used as temp
+ __ ldf(FloatRegisterImpl::S, from, 0, F30);
+ __ ldf(FloatRegisterImpl::S, from, 4, F31);
+ __ fmov(FloatRegisterImpl::D, F30, F54);
+ __ ldf(FloatRegisterImpl::S, from, 8, F30);
+ __ ldf(FloatRegisterImpl::S, from, 12, F31);
+ __ fmov(FloatRegisterImpl::D, F30, F56);
+
+ // load expanded key
+ for ( int i = 0; i <= 38; i += 2 ) {
+ __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
+ }
+
+ // perform cipher transformation
+ __ fxor(FloatRegisterImpl::D, F0, F54, F54);
+ __ fxor(FloatRegisterImpl::D, F2, F56, F56);
+ // rounds 1 through 8
+ for ( int i = 4; i <= 28; i += 8 ) {
+ __ aes_eround01(as_FloatRegister(i), F54, F56, F58);
+ __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
+ __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
+ __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
+ }
+ __ aes_eround01(F36, F54, F56, F58); //round 9
+ __ aes_eround23(F38, F54, F56, F60);
+
+ // 128-bit original key size
+ __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
+
+ for ( int i = 40; i <= 50; i += 2 ) {
+ __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
+ }
+ __ aes_eround01(F40, F58, F60, F54); //round 10
+ __ aes_eround23(F42, F58, F60, F56);
+ __ aes_eround01(F44, F54, F56, F58); //round 11
+ __ aes_eround23(F46, F54, F56, F60);
+
+ // 192-bit original key size
+ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
+
+ __ ldf(FloatRegisterImpl::D, key, 208, F52);
+ __ aes_eround01(F48, F58, F60, F54); //round 12
+ __ aes_eround23(F50, F58, F60, F56);
+ __ ldf(FloatRegisterImpl::D, key, 216, F46);
+ __ ldf(FloatRegisterImpl::D, key, 224, F48);
+ __ ldf(FloatRegisterImpl::D, key, 232, F50);
+ __ aes_eround01(F52, F54, F56, F58); //round 13
+ __ aes_eround23(F46, F54, F56, F60);
+ __ br(Assembler::always, false, Assembler::pt, L_storeOutput);
+ __ delayed()->nop();
+
+ __ BIND(L_doLast128bit);
+ __ ldf(FloatRegisterImpl::D, key, 160, F48);
+ __ ldf(FloatRegisterImpl::D, key, 168, F50);
+
+ __ BIND(L_storeOutput);
+ // perform last round of encryption common for all key sizes
+ __ aes_eround01_l(F48, F58, F60, F54); //last round
+ __ aes_eround23_l(F50, F58, F60, F56);
+
+ // store output into the destination array, F0-F1 used as temp
+ __ fmov(FloatRegisterImpl::D, F54, F0);
+ __ stf(FloatRegisterImpl::S, F0, to, 0);
+ __ stf(FloatRegisterImpl::S, F1, to, 4);
+ __ fmov(FloatRegisterImpl::D, F56, F0);
+ __ stf(FloatRegisterImpl::S, F0, to, 8);
+ __ retl();
+ __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+
+ return start;
+ }
+
+ address generate_aescrypt_decryptBlock() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
+ address start = __ pc();
+ Label L_expand192bit, L_expand256bit, L_common_transform;
+ Register from = O0; // source byte array
+ Register to = O1; // destination byte array
+ Register key = O2; // expanded key array
+ Register original_key = O3; // original key array only required during decryption
+ const Register keylen = O4; // reg for storing expanded key array length
+
+ // read expanded key array length
+ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+ // load input into F52-F54; F30,F31 used as temp
+ __ ldf(FloatRegisterImpl::S, from, 0, F30);
+ __ ldf(FloatRegisterImpl::S, from, 4, F31);
+ __ fmov(FloatRegisterImpl::D, F30, F52);
+ __ ldf(FloatRegisterImpl::S, from, 8, F30);
+ __ ldf(FloatRegisterImpl::S, from, 12, F31);
+ __ fmov(FloatRegisterImpl::D, F30, F54);
+
+ // load original key from SunJCE expanded decryption key
+ for ( int i = 0; i <= 3; i++ ) {
+ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+ }
+
+ // 256-bit original key size
+ __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
+
+ // 192-bit original key size
+ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
+
+ // 128-bit original key size
+ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+ for ( int i = 0; i <= 36; i += 4 ) {
+ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
+ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
+ }
+
+ // perform 128-bit key specific inverse cipher transformation
+ __ fxor(FloatRegisterImpl::D, F42, F54, F54);
+ __ fxor(FloatRegisterImpl::D, F40, F52, F52);
+ __ br(Assembler::always, false, Assembler::pt, L_common_transform);
+ __ delayed()->nop();
+
+ __ BIND(L_expand192bit);
+
+ // start loading rest of the 192-bit key
+ __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
+ __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
+
+ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+ for ( int i = 0; i <= 36; i += 6 ) {
+ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
+ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
+ __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
+ }
+ __ aes_kexpand1(F42, F46, 7, F48);
+ __ aes_kexpand2(F44, F48, F50);
+
+ // perform 192-bit key specific inverse cipher transformation
+ __ fxor(FloatRegisterImpl::D, F50, F54, F54);
+ __ fxor(FloatRegisterImpl::D, F48, F52, F52);
+ __ aes_dround23(F46, F52, F54, F58);
+ __ aes_dround01(F44, F52, F54, F56);
+ __ aes_dround23(F42, F56, F58, F54);
+ __ aes_dround01(F40, F56, F58, F52);
+ __ br(Assembler::always, false, Assembler::pt, L_common_transform);
+ __ delayed()->nop();
+
+ __ BIND(L_expand256bit);
+
+ // load rest of the 256-bit key
+ for ( int i = 4; i <= 7; i++ ) {
+ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+ }
+
+ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+ for ( int i = 0; i <= 40; i += 8 ) {
+ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
+ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
+ __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
+ __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
+ }
+ __ aes_kexpand1(F48, F54, 6, F56);
+ __ aes_kexpand2(F50, F56, F58);
+
+ for ( int i = 0; i <= 6; i += 2 ) {
+ __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
+ }
+
+ // load input into F52-F54
+ __ ldf(FloatRegisterImpl::D, from, 0, F52);
+ __ ldf(FloatRegisterImpl::D, from, 8, F54);
+
+ // perform 256-bit key specific inverse cipher transformation
+ __ fxor(FloatRegisterImpl::D, F0, F54, F54);
+ __ fxor(FloatRegisterImpl::D, F2, F52, F52);
+ __ aes_dround23(F4, F52, F54, F58);
+ __ aes_dround01(F6, F52, F54, F56);
+ __ aes_dround23(F50, F56, F58, F54);
+ __ aes_dround01(F48, F56, F58, F52);
+ __ aes_dround23(F46, F52, F54, F58);
+ __ aes_dround01(F44, F52, F54, F56);
+ __ aes_dround23(F42, F56, F58, F54);
+ __ aes_dround01(F40, F56, F58, F52);
+
+ for ( int i = 0; i <= 7; i++ ) {
+ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+ }
+
+ // perform inverse cipher transformations common for all key sizes
+ __ BIND(L_common_transform);
+ for ( int i = 38; i >= 6; i -= 8 ) {
+ __ aes_dround23(as_FloatRegister(i), F52, F54, F58);
+ __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
+ if ( i != 6) {
+ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
+ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
+ } else {
+ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
+ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
+ }
+ }
+
+ // store output to destination array, F0-F1 used as temp
+ __ fmov(FloatRegisterImpl::D, F52, F0);
+ __ stf(FloatRegisterImpl::S, F0, to, 0);
+ __ stf(FloatRegisterImpl::S, F1, to, 4);
+ __ fmov(FloatRegisterImpl::D, F54, F0);
+ __ stf(FloatRegisterImpl::S, F0, to, 8);
+ __ retl();
+ __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+
+ return start;
+ }
+
+ address generate_cipherBlockChaining_encryptAESCrypt() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+ Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
+ address start = __ pc();
+ Register from = O0; // source byte array
+ Register to = O1; // destination byte array
+ Register key = O2; // expanded key array
+ Register rvec = O3; // init vector
+ const Register len_reg = O4; // cipher length
+ const Register keylen = O5; // reg for storing expanded key array length
+
+ // save cipher len to return in the end
+ __ mov(len_reg, L1);
+
+ // read expanded key length
+ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+ // load init vector
+ __ ldf(FloatRegisterImpl::D, rvec, 0, F60);
+ __ ldf(FloatRegisterImpl::D, rvec, 8, F62);
+ __ ldx(key,0,G1);
+ __ ldx(key,8,G2);
+
+ // start loading expanded key
+ for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
+ __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+ }
+
+ // 128-bit original key size
+ __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
+
+ for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) {
+ __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+ }
+
+ // 192-bit original key size
+ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
+
+ for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) {
+ __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+ }
+
+ // 256-bit original key size
+ __ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
+ __ delayed()->nop();
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_cbcenc128);
+ __ ldx(from,0,G3);
+ __ ldx(from,8,G4);
+ __ xor3(G1,G3,G3);
+ __ xor3(G2,G4,G4);
+ __ movxtod(G3,F56);
+ __ movxtod(G4,F58);
+ __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+ __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+ // TEN_EROUNDS
+ for ( int i = 0; i <= 32; i += 8 ) {
+ __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+ __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+ if (i != 32 ) {
+ __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+ __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+ } else {
+ __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+ __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+ }
+ }
+
+ __ stf(FloatRegisterImpl::D, F60, to, 0);
+ __ stf(FloatRegisterImpl::D, F62, to, 8);
+ __ add(from, 16, from);
+ __ add(to, 16, to);
+ __ subcc(len_reg, 16, len_reg);
+ __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
+ __ delayed()->nop();
+ __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+ __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+ __ retl();
+ __ delayed()->mov(L1, O0);
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_cbcenc192);
+ __ ldx(from,0,G3);
+ __ ldx(from,8,G4);
+ __ xor3(G1,G3,G3);
+ __ xor3(G2,G4,G4);
+ __ movxtod(G3,F56);
+ __ movxtod(G4,F58);
+ __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+ __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+ // TWELEVE_EROUNDS
+ for ( int i = 0; i <= 40; i += 8 ) {
+ __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+ __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+ if (i != 40 ) {
+ __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+ __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+ } else {
+ __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+ __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+ }
+ }
+
+ __ stf(FloatRegisterImpl::D, F60, to, 0);
+ __ stf(FloatRegisterImpl::D, F62, to, 8);
+ __ add(from, 16, from);
+ __ subcc(len_reg, 16, len_reg);
+ __ add(to, 16, to);
+ __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
+ __ delayed()->nop();
+ __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+ __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+ __ retl();
+ __ delayed()->mov(L1, O0);
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_cbcenc256);
+ __ ldx(from,0,G3);
+ __ ldx(from,8,G4);
+ __ xor3(G1,G3,G3);
+ __ xor3(G2,G4,G4);
+ __ movxtod(G3,F56);
+ __ movxtod(G4,F58);
+ __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+ __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+ // FOURTEEN_EROUNDS
+ for ( int i = 0; i <= 48; i += 8 ) {
+ __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+ __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+ if (i != 48 ) {
+ __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+ __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+ } else {
+ __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+ __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+ }
+ }
+
+ __ stf(FloatRegisterImpl::D, F60, to, 0);
+ __ stf(FloatRegisterImpl::D, F62, to, 8);
+ __ add(from, 16, from);
+ __ subcc(len_reg, 16, len_reg);
+ __ add(to, 16, to);
+ __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
+ __ delayed()->nop();
+ __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+ __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+ __ retl();
+ __ delayed()->mov(L1, O0);
+
+ return start;
+ }
+
+ address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+ Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
+ Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
+ address start = __ pc();
+ Register from = I0; // source byte array
+ Register to = I1; // destination byte array
+ Register key = I2; // expanded key array
+ Register rvec = I3; // init vector
+ const Register len_reg = I4; // cipher length
+ const Register original_key = I5; // original key array only required during decryption
+ const Register keylen = L6; // reg for storing expanded key array length
+
+ // save cipher len before save_frame, to return in the end
+ __ mov(O4, L0);
+ __ save_frame(0); //args are read from I* registers since we save the frame in the beginning
+
+ // load original key from SunJCE expanded decryption key
+ for ( int i = 0; i <= 3; i++ ) {
+ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+ }
+
+ // load initial vector
+ __ ldx(rvec,0,L0);
+ __ ldx(rvec,8,L1);
+
+ // read expanded key array length
+ __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+ // 256-bit original key size
+ __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
+
+ // 192-bit original key size
+ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
+
+ // 128-bit original key size
+ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+ for ( int i = 0; i <= 36; i += 4 ) {
+ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
+ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
+ }
+
+ // load expanded key[last-1] and key[last] elements
+ __ movdtox(F40,L2);
+ __ movdtox(F42,L3);
+
+ __ and3(len_reg, 16, L4);
+ __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
+ __ delayed()->nop();
+
+ __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
+ __ delayed()->nop();
+
+ __ BIND(L_expand192bit);
+ // load rest of the 192-bit key
+ __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
+ __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
+
+ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+ for ( int i = 0; i <= 36; i += 6 ) {
+ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
+ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
+ __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
+ }
+ __ aes_kexpand1(F42, F46, 7, F48);
+ __ aes_kexpand2(F44, F48, F50);
+
+ // load expanded key[last-1] and key[last] elements
+ __ movdtox(F48,L2);
+ __ movdtox(F50,L3);
+
+ __ and3(len_reg, 16, L4);
+ __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
+ __ delayed()->nop();
+
+ __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
+ __ delayed()->nop();
+
+ __ BIND(L_expand256bit);
+ // load rest of the 256-bit key
+ for ( int i = 4; i <= 7; i++ ) {
+ __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+ }
+
+ // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+ for ( int i = 0; i <= 40; i += 8 ) {
+ __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
+ __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
+ __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
+ __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
+ }
+ __ aes_kexpand1(F48, F54, 6, F56);
+ __ aes_kexpand2(F50, F56, F58);
+
+ // load expanded key[last-1] and key[last] elements
+ __ movdtox(F56,L2);
+ __ movdtox(F58,L3);
+
+ __ and3(len_reg, 16, L4);
+ __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
+ __ delayed()->nop();
+
+ __ BIND(L_dec_first_block_start);
+ __ ldx(from,0,L4);
+ __ ldx(from,8,L5);
+ __ xor3(L2,L4,G1);
+ __ movxtod(G1,F60);
+ __ xor3(L3,L5,G1);
+ __ movxtod(G1,F62);
+
+ // 128-bit original key size
+ __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
+
+ // 192-bit original key size
+ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
+
+ __ aes_dround23(F54, F60, F62, F58);
+ __ aes_dround01(F52, F60, F62, F56);
+ __ aes_dround23(F50, F56, F58, F62);
+ __ aes_dround01(F48, F56, F58, F60);
+
+ __ BIND(L_dec_first_block192);
+ __ aes_dround23(F46, F60, F62, F58);
+ __ aes_dround01(F44, F60, F62, F56);
+ __ aes_dround23(F42, F56, F58, F62);
+ __ aes_dround01(F40, F56, F58, F60);
+
+ __ BIND(L_dec_first_block128);
+ for ( int i = 38; i >= 6; i -= 8 ) {
+ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+ if ( i != 6) {
+ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+ } else {
+ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+ }
+ }
+
+ __ movxtod(L0,F56);
+ __ movxtod(L1,F58);
+ __ mov(L4,L0);
+ __ mov(L5,L1);
+ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+ __ stf(FloatRegisterImpl::D, F60, to, 0);
+ __ stf(FloatRegisterImpl::D, F62, to, 8);
+
+ __ add(from, 16, from);
+ __ add(to, 16, to);
+ __ subcc(len_reg, 16, len_reg);
+ __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
+ __ delayed()->nop();
+
+ // 256-bit original key size
+ __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
+
+ // 192-bit original key size
+ __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_dec_next2_blocks128);
+ __ nop();
+
+ // F40:F42 used for first 16-bytes
+ __ ldx(from,0,G4);
+ __ ldx(from,8,G5);
+ __ xor3(L2,G4,G1);
+ __ movxtod(G1,F40);
+ __ xor3(L3,G5,G1);
+ __ movxtod(G1,F42);
+
+ // F60:F62 used for next 16-bytes
+ __ ldx(from,16,L4);
+ __ ldx(from,24,L5);
+ __ xor3(L2,L4,G1);
+ __ movxtod(G1,F60);
+ __ xor3(L3,L5,G1);
+ __ movxtod(G1,F62);
+
+ for ( int i = 38; i >= 6; i -= 8 ) {
+ __ aes_dround23(as_FloatRegister(i), F40, F42, F44);
+ __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
+ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+ if (i != 6 ) {
+ __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
+ __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
+ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+ } else {
+ __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
+ __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
+ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+ }
+ }
+
+ __ movxtod(L0,F46);
+ __ movxtod(L1,F44);
+ __ fxor(FloatRegisterImpl::D, F46, F40, F40);
+ __ fxor(FloatRegisterImpl::D, F44, F42, F42);
+
+ __ stf(FloatRegisterImpl::D, F40, to, 0);
+ __ stf(FloatRegisterImpl::D, F42, to, 8);
+
+ __ movxtod(G4,F56);
+ __ movxtod(G5,F58);
+ __ mov(L4,L0);
+ __ mov(L5,L1);
+ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+ __ stf(FloatRegisterImpl::D, F60, to, 16);
+ __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+ __ add(from, 32, from);
+ __ add(to, 32, to);
+ __ subcc(len_reg, 32, len_reg);
+ __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
+ __ delayed()->nop();
+ __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
+ __ delayed()->nop();
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_dec_next2_blocks192);
+ __ nop();
+
+ // F48:F50 used for first 16-bytes
+ __ ldx(from,0,G4);
+ __ ldx(from,8,G5);
+ __ xor3(L2,G4,G1);
+ __ movxtod(G1,F48);
+ __ xor3(L3,G5,G1);
+ __ movxtod(G1,F50);
+
+ // F60:F62 used for next 16-bytes
+ __ ldx(from,16,L4);
+ __ ldx(from,24,L5);
+ __ xor3(L2,L4,G1);
+ __ movxtod(G1,F60);
+ __ xor3(L3,L5,G1);
+ __ movxtod(G1,F62);
+
+ for ( int i = 46; i >= 6; i -= 8 ) {
+ __ aes_dround23(as_FloatRegister(i), F48, F50, F52);
+ __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
+ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+ if (i != 6 ) {
+ __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
+ __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
+ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+ } else {
+ __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
+ __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
+ __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+ }
+ }
+
+ __ movxtod(L0,F54);
+ __ movxtod(L1,F52);
+ __ fxor(FloatRegisterImpl::D, F54, F48, F48);
+ __ fxor(FloatRegisterImpl::D, F52, F50, F50);
+
+ __ stf(FloatRegisterImpl::D, F48, to, 0);
+ __ stf(FloatRegisterImpl::D, F50, to, 8);
+
+ __ movxtod(G4,F56);
+ __ movxtod(G5,F58);
+ __ mov(L4,L0);
+ __ mov(L5,L1);
+ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+ __ stf(FloatRegisterImpl::D, F60, to, 16);
+ __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+ __ add(from, 32, from);
+ __ add(to, 32, to);
+ __ subcc(len_reg, 32, len_reg);
+ __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
+ __ delayed()->nop();
+ __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
+ __ delayed()->nop();
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_dec_next2_blocks256);
+ __ nop();
+
+ // F0:F2 used for first 16-bytes
+ __ ldx(from,0,G4);
+ __ ldx(from,8,G5);
+ __ xor3(L2,G4,G1);
+ __ movxtod(G1,F0);
+ __ xor3(L3,G5,G1);
+ __ movxtod(G1,F2);
+
+ // F60:F62 used for next 16-bytes
+ __ ldx(from,16,L4);
+ __ ldx(from,24,L5);
+ __ xor3(L2,L4,G1);
+ __ movxtod(G1,F60);
+ __ xor3(L3,L5,G1);
+ __ movxtod(G1,F62);
+
+ __ aes_dround23(F54, F0, F2, F4);
+ __ aes_dround01(F52, F0, F2, F6);
+ __ aes_dround23(F54, F60, F62, F58);
+ __ aes_dround01(F52, F60, F62, F56);
+ __ aes_dround23(F50, F6, F4, F2);
+ __ aes_dround01(F48, F6, F4, F0);
+ __ aes_dround23(F50, F56, F58, F62);
+ __ aes_dround01(F48, F56, F58, F60);
+ // save F48:F54 in temp registers
+ __ movdtox(F54,G2);
+ __ movdtox(F52,G3);
+ __ movdtox(F50,G6);
+ __ movdtox(F48,G1);
+ for ( int i = 46; i >= 14; i -= 8 ) {
+ __ aes_dround23(as_FloatRegister(i), F0, F2, F4);
+ __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
+ __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+ __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+ __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
+ __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
+ __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+ __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+ }
+ // init F48:F54 with F0:F6 values (original key)
+ __ ldf(FloatRegisterImpl::D, original_key, 0, F48);
+ __ ldf(FloatRegisterImpl::D, original_key, 8, F50);
+ __ ldf(FloatRegisterImpl::D, original_key, 16, F52);
+ __ ldf(FloatRegisterImpl::D, original_key, 24, F54);
+ __ aes_dround23(F54, F0, F2, F4);
+ __ aes_dround01(F52, F0, F2, F6);
+ __ aes_dround23(F54, F60, F62, F58);
+ __ aes_dround01(F52, F60, F62, F56);
+ __ aes_dround23_l(F50, F6, F4, F2);
+ __ aes_dround01_l(F48, F6, F4, F0);
+ __ aes_dround23_l(F50, F56, F58, F62);
+ __ aes_dround01_l(F48, F56, F58, F60);
+ // re-init F48:F54 with their original values
+ __ movxtod(G2,F54);
+ __ movxtod(G3,F52);
+ __ movxtod(G6,F50);
+ __ movxtod(G1,F48);
+
+ __ movxtod(L0,F6);
+ __ movxtod(L1,F4);
+ __ fxor(FloatRegisterImpl::D, F6, F0, F0);
+ __ fxor(FloatRegisterImpl::D, F4, F2, F2);
+
+ __ stf(FloatRegisterImpl::D, F0, to, 0);
+ __ stf(FloatRegisterImpl::D, F2, to, 8);
+
+ __ movxtod(G4,F56);
+ __ movxtod(G5,F58);
+ __ mov(L4,L0);
+ __ mov(L5,L1);
+ __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+ __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+ __ stf(FloatRegisterImpl::D, F60, to, 16);
+ __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+ __ add(from, 32, from);
+ __ add(to, 32, to);
+ __ subcc(len_reg, 32, len_reg);
+ __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
+ __ delayed()->nop();
+
+ __ BIND(L_cbcdec_end);
+ __ stx(L0, rvec, 0);
+ __ stx(L1, rvec, 8);
+ __ restore();
+ __ mov(L0, O0);
+ __ retl();
+ __ delayed()->nop();
+
+ return start;
+ }
+
void generate_initial() {
// Generates all stubs and initializes the entry points
@@ -3368,6 +4137,14 @@
generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
&StubRoutines::_safefetchN_fault_pc,
&StubRoutines::_safefetchN_continuation_pc);
+
+ // generate AES intrinsics code
+ if (UseAESIntrinsics) {
+ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+ }
}