jdk-sandbox: changeset 22509:29988315e412

--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -88,6 +88,7 @@
     orncc_op3    = 0x16,
     xnorcc_op3   = 0x17,
     addccc_op3   = 0x18,
+    aes4_op3     = 0x19,
     umulcc_op3   = 0x1a,
     smulcc_op3   = 0x1b,
     subccc_op3   = 0x1c,
@@ -121,6 +122,8 @@
     fpop1_op3    = 0x34,
     fpop2_op3    = 0x35,
     impdep1_op3  = 0x36,
+    aes3_op3     = 0x36,
+    flog3_op3    = 0x36,
     impdep2_op3  = 0x37,
     jmpl_op3     = 0x38,
     rett_op3     = 0x39,
@@ -172,41 +175,56 @@
 
   enum opfs {
     // selected opfs
-    fmovs_opf   = 0x01,
-    fmovd_opf   = 0x02,
+    fmovs_opf          = 0x01,
+    fmovd_opf          = 0x02,
 
-    fnegs_opf   = 0x05,
-    fnegd_opf   = 0x06,
+    fnegs_opf          = 0x05,
+    fnegd_opf          = 0x06,
 
-    fadds_opf   = 0x41,
-    faddd_opf   = 0x42,
-    fsubs_opf   = 0x45,
-    fsubd_opf   = 0x46,
+    fadds_opf          = 0x41,
+    faddd_opf          = 0x42,
+    fsubs_opf          = 0x45,
+    fsubd_opf          = 0x46,
 
-    fmuls_opf   = 0x49,
-    fmuld_opf   = 0x4a,
-    fdivs_opf   = 0x4d,
-    fdivd_opf   = 0x4e,
+    fmuls_opf          = 0x49,
+    fmuld_opf          = 0x4a,
+    fdivs_opf          = 0x4d,
+    fdivd_opf          = 0x4e,
+
+    fcmps_opf          = 0x51,
+    fcmpd_opf          = 0x52,
 
-    fcmps_opf   = 0x51,
-    fcmpd_opf   = 0x52,
+    fstox_opf          = 0x81,
+    fdtox_opf          = 0x82,
+    fxtos_opf          = 0x84,
+    fxtod_opf          = 0x88,
+    fitos_opf          = 0xc4,
+    fdtos_opf          = 0xc6,
+    fitod_opf          = 0xc8,
+    fstod_opf          = 0xc9,
+    fstoi_opf          = 0xd1,
+    fdtoi_opf          = 0xd2,
 
-    fstox_opf   = 0x81,
-    fdtox_opf   = 0x82,
-    fxtos_opf   = 0x84,
-    fxtod_opf   = 0x88,
-    fitos_opf   = 0xc4,
-    fdtos_opf   = 0xc6,
-    fitod_opf   = 0xc8,
-    fstod_opf   = 0xc9,
-    fstoi_opf   = 0xd1,
-    fdtoi_opf   = 0xd2,
+    mdtox_opf          = 0x110,
+    mstouw_opf         = 0x111,
+    mstosw_opf         = 0x113,
+    mxtod_opf          = 0x118,
+    mwtos_opf          = 0x119,
+
+    aes_kexpand0_opf   = 0x130,
+    aes_kexpand2_opf   = 0x131
+  };
 
-    mdtox_opf   = 0x110,
-    mstouw_opf  = 0x111,
-    mstosw_opf  = 0x113,
-    mxtod_opf   = 0x118,
-    mwtos_opf   = 0x119
+  enum op5s {
+    aes_eround01_op5     = 0x00,
+    aes_eround23_op5     = 0x01,
+    aes_dround01_op5     = 0x02,
+    aes_dround23_op5     = 0x03,
+    aes_eround01_l_op5   = 0x04,
+    aes_eround23_l_op5   = 0x05,
+    aes_dround01_l_op5   = 0x06,
+    aes_dround23_l_op5   = 0x07,
+    aes_kexpand1_op5     = 0x08
   };
 
   enum RCondition {  rc_z = 1,  rc_lez = 2,  rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez  };
@@ -427,6 +445,7 @@
   static int immed(    bool        i)  { return  u_field(i ? 1 : 0,     13, 13); }
   static int opf_low6( int         w)  { return  u_field(w,             10,  5); }
   static int opf_low5( int         w)  { return  u_field(w,              9,  5); }
+  static int op5(      int         x)  { return  u_field(x,              8,  5); }
   static int trapcc(   CC         cc)  { return  u_field(cc,            12, 11); }
   static int sx(       int         i)  { return  u_field(i,             12, 12); } // shift x=1 means 64-bit
   static int opf(      int         x)  { return  u_field(x,             13,  5); }
@@ -451,6 +470,7 @@
   static int fd( FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); };
   static int fs1(FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); };
   static int fs2(FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa),  4,  0); };
+  static int fs3(FloatRegister r,  FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13,  9); };
 
   // some float instructions use this encoding on the op3 field
   static int alt_op3(int op, FloatRegisterImpl::Width w) {
@@ -559,6 +579,12 @@
     return x & ((1 << 10) - 1);
   }
 
+  // AES crypto instructions supported only on certain processors
+  static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
+
+  // instruction only in VIS1
+  static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
+
   // instruction only in VIS3
   static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }
 
@@ -682,6 +708,24 @@
   void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
 
+  // 4-operand AES instructions
+
+  void aes_eround01(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_eround23(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround01(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround23(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_eround01_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_eround23_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround01_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_dround23_l(  FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_kexpand1(  FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); }
+
+
+  // 3-operand AES instructions
+
+  void aes_kexpand0(  FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); }
+  void aes_kexpand2(  FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); }
+
   // pp 136
 
   inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
@@ -784,6 +828,10 @@
   void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw,  FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
   void fdiv( FloatRegisterImpl::Width w,                            FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w)  | op3(fpop1_op3) | fs1(s1, w)  | opf(0x4c + w)         | fs2(s2, w)); }
 
+  // FXORs/FXORd instructions
+
+  void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); }
+
   // pp 164
 
   void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }

--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -1315,7 +1315,7 @@
 }
 
 Address LIR_Assembler::as_Address(LIR_Address* addr) {
-  Register reg = addr->base()->as_register();
+  Register reg = addr->base()->as_pointer_register();
   LIR_Opr index = addr->index();
   if (index->is_illegal()) {
     return Address(reg, addr->disp());
@@ -3101,7 +3101,145 @@
 }
 
 void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
-  fatal("Type profiling not implemented on this platform");
+  Register obj = op->obj()->as_register();
+  Register tmp1 = op->tmp()->as_pointer_register();
+  Register tmp2 = G1;
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+
+  __ verify_oop(obj);
+
+  if (tmp1 != obj) {
+    __ mov(obj, tmp1);
+  }
+  if (do_null) {
+    __ br_notnull_short(tmp1, Assembler::pt, update);
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ ld_ptr(mdo_addr, tmp1);
+      __ or3(tmp1, TypeEntries::null_seen, tmp1);
+      __ st_ptr(tmp1, mdo_addr);
+    }
+    if (do_update) {
+      __ ba(next);
+      __ delayed()->nop();
+    }
+#ifdef ASSERT
+  } else {
+    __ br_notnull_short(tmp1, Assembler::pt, update);
+    __ stop("unexpect null obj");
+#endif
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      __ load_klass(tmp1, tmp1);
+      metadata2reg(exact_klass->constant_encoding(), tmp2);
+      __ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok);
+      __ stop("exact klass and actual klass differ");
+      __ bind(ok);
+    }
+#endif
+
+    Label do_update;
+    __ ld_ptr(mdo_addr, tmp2);
+
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        if (exact_klass != NULL) {
+          metadata2reg(exact_klass->constant_encoding(), tmp1);
+        } else {
+          __ load_klass(tmp1, tmp1);
+        }
+
+        __ xor3(tmp1, tmp2, tmp1);
+        __ btst(TypeEntries::type_klass_mask, tmp1);
+        // klass seen before, nothing to do. The unknown bit may have been
+        // set already but no need to check.
+        __ brx(Assembler::zero, false, Assembler::pt, next);
+        __ delayed()->
+
+           btst(TypeEntries::type_unknown, tmp1);
+        // already unknown. Nothing to do anymore.
+        __ brx(Assembler::notZero, false, Assembler::pt, next);
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ delayed()->btst(TypeEntries::type_mask, tmp2);
+          __ brx(Assembler::zero, true, Assembler::pt, do_update);
+          // first time here. Set profile type.
+          __ delayed()->or3(tmp2, tmp1, tmp2);
+        } else {
+          __ delayed()->nop();
+        }
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ btst(TypeEntries::type_unknown, tmp2);
+        // already unknown. Nothing to do anymore.
+        __ brx(Assembler::notZero, false, Assembler::pt, next);
+        __ delayed()->nop();
+      }
+
+      // different than before. Cannot keep accurate profile.
+      __ or3(tmp2, TypeEntries::type_unknown, tmp2);
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      if (TypeEntries::is_type_none(current_klass)) {
+        metadata2reg(exact_klass->constant_encoding(), tmp1);
+        __ xor3(tmp1, tmp2, tmp1);
+        __ btst(TypeEntries::type_klass_mask, tmp1);
+        __ brx(Assembler::zero, false, Assembler::pt, next);
+#ifdef ASSERT
+
+        {
+          Label ok;
+          __ delayed()->btst(TypeEntries::type_mask, tmp2);
+          __ brx(Assembler::zero, true, Assembler::pt, ok);
+          __ delayed()->nop();
+
+          __ stop("unexpected profiling mismatch");
+          __ bind(ok);
+        }
+        // first time here. Set profile type.
+        __ or3(tmp2, tmp1, tmp2);
+#else
+        // first time here. Set profile type.
+        __ delayed()->or3(tmp2, tmp1, tmp2);
+#endif
+
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        // already unknown. Nothing to do anymore.
+        __ btst(TypeEntries::type_unknown, tmp2);
+        __ brx(Assembler::notZero, false, Assembler::pt, next);
+        __ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2);
+      }
+    }
+
+    __ bind(do_update);
+    __ st_ptr(tmp2, mdo_addr);
+
+    __ bind(next);
+  }
 }
 
 void LIR_Assembler::align_backward_branch_target() {
@@ -3321,9 +3459,14 @@
 
 void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
   LIR_Address* addr = addr_opr->as_address_ptr();
-  assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet");
-
-  __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
+  assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet");
+
+  if (Assembler::is_simm13(addr->disp())) {
+    __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
+  } else {
+    __ set(addr->disp(), G3_scratch);
+    __ add(addr->base()->as_pointer_register(), G3_scratch, dest->as_pointer_register());
+  }
 }

--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -1892,6 +1892,220 @@
   }
 }
 
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
+  Label not_null, do_nothing, do_update;
+
+  assert_different_registers(obj, mdo_addr.base(), tmp);
+
+  verify_oop(obj);
+
+  ld_ptr(mdo_addr, tmp);
+
+  br_notnull_short(obj, pt, not_null);
+  or3(tmp, TypeEntries::null_seen, tmp);
+  ba_short(do_update);
+
+  bind(not_null);
+  load_klass(obj, obj);
+
+  xor3(obj, tmp, obj);
+  btst(TypeEntries::type_klass_mask, obj);
+  // klass seen before, nothing to do. The unknown bit may have been
+  // set already but no need to check.
+  brx(zero, false, pt, do_nothing);
+  delayed()->
+
+  btst(TypeEntries::type_unknown, obj);
+  // already unknown. Nothing to do anymore.
+  brx(notZero, false, pt, do_nothing);
+  delayed()->
+
+  btst(TypeEntries::type_mask, tmp);
+  brx(zero, true, pt, do_update);
+  // first time here. Set profile type.
+  delayed()->or3(tmp, obj, tmp);
+
+  // different than before. Cannot keep accurate profile.
+  or3(tmp, TypeEntries::type_unknown, tmp);
+
+  bind(do_update);
+  // update profile
+  st_ptr(tmp, mdo_addr);
+
+  bind(do_nothing);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr);
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1);
+    cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue);
+
+    if (MethodData::profile_arguments()) {
+      Label done;
+      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+      add(ImethodDataPtr, off_to_args, ImethodDataPtr);
+
+      for (int i = 0; i < TypeProfileArgsLimit; i++) {
+        if (i > 0 || MethodData::profile_return()) {
+          // If return value type is profiled we may have no argument to profile
+          ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
+          sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1);
+          cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done);
+        }
+        ld_ptr(Address(callee, Method::const_offset()), tmp1);
+        lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1);
+        // stack offset o (zero based) from the start of the argument
+        // list, for n arguments translates into offset n - o - 1 from
+        // the end of the argument list. But there's an extra slot at
+        // the stop of the stack. So the offset is n - o from Lesp.
+        ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2);
+        sub(tmp1, tmp2, tmp1);
+
+        // Can't use MacroAssembler::argument_address() which needs Gargs to be set up
+        sll(tmp1, Interpreter::logStackElementSize, tmp1);
+        ld_ptr(Lesp, tmp1, tmp1);
+
+        Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        profile_obj_type(tmp1, mdo_arg_addr, tmp2);
+
+        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+        add(ImethodDataPtr, to_add, ImethodDataPtr);
+        off_to_args += to_add;
+      }
+
+      if (MethodData::profile_return()) {
+        ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
+        sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1);
+      }
+
+      bind(done);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. tmp1 is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        sll(tmp1, exact_log2(DataLayout::cell_size), tmp1);
+        add(ImethodDataPtr, tmp1, ImethodDataPtr);
+      }
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(in_bytes(ReturnTypeEntry::size()));
+    }
+
+    // mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one.
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) {
+  assert_different_registers(ret, tmp1, tmp2);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // begining of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length.
+      Label do_profile;
+      ldub(Lbcp, 0, tmp1);
+      cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile);
+      cmp(tmp1, Bytecodes::_invokehandle);
+      br(equal, false, pn, do_profile);
+      delayed()->ldub(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1);
+      cmp_and_br_short(tmp1, vmIntrinsics::_compiledLambdaForm, notEqual, pt, profile_continue);
+
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size()));
+    mov(ret, tmp1);
+    profile_obj_type(tmp1, mdo_ret_addr, tmp2);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters.
+    lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1);
+    cmp_and_br_short(tmp1, 0, less, pn, profile_continue);
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+
+    // Pointer to the parameter area in the MDO
+    Register mdp = tmp1;
+    add(ImethodDataPtr, tmp1, mdp);
+
+    // offset of the current profile entry to update
+    Register entry_offset = tmp2;
+    // entry_offset = array len in number of cells
+    ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset);
+
+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+    assert(off_base % DataLayout::cell_size == 0, "should be a number of cells");
+
+    // entry_offset (number of cells)  = array len - size of 1 entry + offset of the stack slot field
+    sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset);
+    // entry_offset in bytes
+    sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset);
+
+    Label loop;
+    bind(loop);
+
+    // load offset on the stack from the slot for this parameter
+    ld_ptr(mdp, entry_offset, tmp3);
+    sll(tmp3,Interpreter::logStackElementSize, tmp3);
+    neg(tmp3);
+    // read the parameter from the local area
+    ld_ptr(Llocals, tmp3, tmp3);
+
+    // make entry_offset now point to the type field for this parameter
+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
+    assert(type_base > off_base, "unexpected");
+    add(entry_offset, type_base - off_base, entry_offset);
+
+    // profile the parameter
+    Address arg_type(mdp, entry_offset);
+    profile_obj_type(tmp3, arg_type, tmp4);
+
+    // go to next parameter
+    sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset);
+    cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop);
+
+    bind(profile_continue);
+  }
+}
+
 // add a InterpMonitorElem to stack (see frame_sparc.hpp)
 
 void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty,

--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -323,6 +323,11 @@
                            Register scratch2,
                            Register scratch3);
 
+  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
+  void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual);
+  void profile_return_type(Register ret, Register tmp1, Register tmp2);
+  void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4);
+
   // Debugging
   void interp_verify_oop(Register reg, TosState state, const char * file, int line);    // only if +VerifyOops && state == atos
   void verify_oop_or_return_address(Register reg, Register rtmp); // for astore

--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Fri Jan 17 10:43:43 2014 -0800
@@ -1848,6 +1848,12 @@
   return false;
 }
 
+// Current (2013) SPARC platforms need to read original key
+// to construct decryption expanded key 
+const bool Matcher::pass_original_key_for_aes() {
+  return true;
+}
+
 // USII supports fxtof through the whole range of number, USIII doesn't
 const bool Matcher::convL2FSupported(void) {
   return VM_Version::has_fast_fxtof();

--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -3304,6 +3304,775 @@
     }
   }
 
+  address generate_aescrypt_encryptBlock() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
+    Label L_doLast128bit, L_storeOutput;
+    address start = __ pc();
+    Register from = O0; // source byte array
+    Register to = O1;   // destination byte array
+    Register key = O2;  // expanded key array
+    const Register keylen = O4; //reg for storing expanded key array length
+
+    // read expanded key length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // load input into F54-F56; F30-F31 used as temp
+    __ ldf(FloatRegisterImpl::S, from, 0, F30);
+    __ ldf(FloatRegisterImpl::S, from, 4, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F54);
+    __ ldf(FloatRegisterImpl::S, from, 8, F30);
+    __ ldf(FloatRegisterImpl::S, from, 12, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F56);
+
+    // load expanded key
+    for ( int i = 0;  i <= 38; i += 2 ) {
+      __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
+    }
+
+    // perform cipher transformation
+    __ fxor(FloatRegisterImpl::D, F0, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F2, F56, F56);
+    // rounds 1 through 8
+    for ( int i = 4;  i <= 28; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F54, F56, F58);
+      __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
+      __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
+      __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
+    }
+    __ aes_eround01(F36, F54, F56, F58); //round 9
+    __ aes_eround23(F38, F54, F56, F60);
+
+    // 128-bit original key size
+    __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
+
+    for ( int i = 40;  i <= 50; i += 2 ) {
+      __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
+    }
+    __ aes_eround01(F40, F58, F60, F54); //round 10
+    __ aes_eround23(F42, F58, F60, F56);
+    __ aes_eround01(F44, F54, F56, F58); //round 11
+    __ aes_eround23(F46, F54, F56, F60);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
+
+    __ ldf(FloatRegisterImpl::D, key, 208, F52);
+    __ aes_eround01(F48, F58, F60, F54); //round 12
+    __ aes_eround23(F50, F58, F60, F56);
+    __ ldf(FloatRegisterImpl::D, key, 216, F46);
+    __ ldf(FloatRegisterImpl::D, key, 224, F48);
+    __ ldf(FloatRegisterImpl::D, key, 232, F50);
+    __ aes_eround01(F52, F54, F56, F58); //round 13
+    __ aes_eround23(F46, F54, F56, F60);
+    __ br(Assembler::always, false, Assembler::pt, L_storeOutput);
+    __ delayed()->nop();
+
+    __ BIND(L_doLast128bit);
+    __ ldf(FloatRegisterImpl::D, key, 160, F48);
+    __ ldf(FloatRegisterImpl::D, key, 168, F50);
+
+    __ BIND(L_storeOutput);
+    // perform last round of encryption common for all key sizes
+    __ aes_eround01_l(F48, F58, F60, F54); //last round
+    __ aes_eround23_l(F50, F58, F60, F56);
+
+    // store output into the destination array, F0-F1 used as temp
+    __ fmov(FloatRegisterImpl::D, F54, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 0);
+    __ stf(FloatRegisterImpl::S, F1, to, 4);
+    __ fmov(FloatRegisterImpl::D, F56, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 8);
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+
+    return start;
+  }
+
+  address generate_aescrypt_decryptBlock() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
+    address start = __ pc();
+    Label L_expand192bit, L_expand256bit, L_common_transform;
+    Register from = O0; // source byte array
+    Register to = O1;   // destination byte array
+    Register key = O2;  // expanded key array
+    Register original_key = O3;  // original key array only required during decryption
+    const Register keylen = O4;  // reg for storing expanded key array length
+
+    // read expanded key array length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // load input into F52-F54; F30,F31 used as temp
+    __ ldf(FloatRegisterImpl::S, from, 0, F30);
+    __ ldf(FloatRegisterImpl::S, from, 4, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F52);
+    __ ldf(FloatRegisterImpl::S, from, 8, F30);
+    __ ldf(FloatRegisterImpl::S, from, 12, F31);
+    __ fmov(FloatRegisterImpl::D, F30, F54);
+
+    // load original key from SunJCE expanded decryption key
+    for ( int i = 0;  i <= 3; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // 256-bit original key size
+    __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
+
+    // 128-bit original key size
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 4 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
+    }
+
+    // perform 128-bit key specific inverse cipher transformation
+    __ fxor(FloatRegisterImpl::D, F42, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F40, F52, F52);
+    __ br(Assembler::always, false, Assembler::pt, L_common_transform);
+    __ delayed()->nop();
+
+    __ BIND(L_expand192bit);
+
+    // start loading rest of the 192-bit key
+    __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
+    __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 6 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
+    }
+    __ aes_kexpand1(F42, F46, 7, F48);
+    __ aes_kexpand2(F44, F48, F50);
+
+    // perform 192-bit key specific inverse cipher transformation
+    __ fxor(FloatRegisterImpl::D, F50, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F48, F52, F52);
+    __ aes_dround23(F46, F52, F54, F58);
+    __ aes_dround01(F44, F52, F54, F56);
+    __ aes_dround23(F42, F56, F58, F54);
+    __ aes_dround01(F40, F56, F58, F52);
+    __ br(Assembler::always, false, Assembler::pt, L_common_transform);
+    __ delayed()->nop();
+
+    __ BIND(L_expand256bit);
+
+    // load rest of the 256-bit key
+    for ( int i = 4;  i <= 7; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 40; i += 8 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
+      __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
+      __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
+    }
+    __ aes_kexpand1(F48, F54, 6, F56);
+    __ aes_kexpand2(F50, F56, F58);
+
+    for ( int i = 0;  i <= 6; i += 2 ) {
+      __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
+    }
+
+    // load input into F52-F54
+    __ ldf(FloatRegisterImpl::D, from, 0, F52);
+    __ ldf(FloatRegisterImpl::D, from, 8, F54);
+
+    // perform 256-bit key specific inverse cipher transformation
+    __ fxor(FloatRegisterImpl::D, F0, F54, F54);
+    __ fxor(FloatRegisterImpl::D, F2, F52, F52);
+    __ aes_dround23(F4, F52, F54, F58);
+    __ aes_dround01(F6, F52, F54, F56);
+    __ aes_dround23(F50, F56, F58, F54);
+    __ aes_dround01(F48, F56, F58, F52);
+    __ aes_dround23(F46, F52, F54, F58);
+    __ aes_dround01(F44, F52, F54, F56);
+    __ aes_dround23(F42, F56, F58, F54);
+    __ aes_dround01(F40, F56, F58, F52);
+
+    for ( int i = 0;  i <= 7; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // perform inverse cipher transformations common for all key sizes
+    __ BIND(L_common_transform);
+    for ( int i = 38;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F52, F54, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
+      if ( i != 6) {
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
+      }
+    }
+
+    // store output to destination array, F0-F1 used as temp
+    __ fmov(FloatRegisterImpl::D, F52, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 0);
+    __ stf(FloatRegisterImpl::S, F1, to, 4);
+    __ fmov(FloatRegisterImpl::D, F54, F0);
+    __ stf(FloatRegisterImpl::S, F0, to, 8);
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
+
+    return start;
+  }
+
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+    Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
+    address start = __ pc();
+    Register from = O0; // source byte array
+    Register to = O1;   // destination byte array
+    Register key = O2;  // expanded key array
+    Register rvec = O3; // init vector
+    const Register len_reg = O4; // cipher length
+    const Register keylen = O5;  // reg for storing expanded key array length
+
+    // save cipher len to return in the end
+    __ mov(len_reg, L1);
+
+    // read expanded key length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // load init vector
+    __ ldf(FloatRegisterImpl::D, rvec, 0, F60);
+    __ ldf(FloatRegisterImpl::D, rvec, 8, F62);
+    __ ldx(key,0,G1);
+    __ ldx(key,8,G2);
+
+    // start loading expanded key
+    for ( int i = 0, j = 16;  i <= 38; i += 2, j += 8 ) {
+      __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+    }
+
+    // 128-bit original key size
+    __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
+
+    for ( int i = 40, j = 176;  i <= 46; i += 2, j += 8 ) {
+      __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+    }
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
+
+    for ( int i = 48, j = 208;  i <= 54; i += 2, j += 8 ) {
+      __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
+    }
+
+    // 256-bit original key size
+    __ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
+    __ delayed()->nop();
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_cbcenc128);
+    __ ldx(from,0,G3);
+    __ ldx(from,8,G4);
+    __ xor3(G1,G3,G3);
+    __ xor3(G2,G4,G4);
+    __ movxtod(G3,F56);
+    __ movxtod(G4,F58);
+    __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+    __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+    // TEN_EROUNDS
+    for ( int i = 0;  i <= 32; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+      __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+      if (i != 32 ) {
+        __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+      } else {
+        __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+      }
+    }
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+    __ add(from, 16, from);
+    __ add(to, 16, to);
+    __ subcc(len_reg, 16, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
+    __ delayed()->nop();
+    __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+    __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+    __ retl();
+    __ delayed()->mov(L1, O0);
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_cbcenc192);
+    __ ldx(from,0,G3);
+    __ ldx(from,8,G4);
+    __ xor3(G1,G3,G3);
+    __ xor3(G2,G4,G4);
+    __ movxtod(G3,F56);
+    __ movxtod(G4,F58);
+    __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+    __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+    // TWELEVE_EROUNDS
+    for ( int i = 0;  i <= 40; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+      __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+      if (i != 40 ) {
+        __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+      } else {
+        __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+      }
+    }
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+    __ add(from, 16, from);
+    __ subcc(len_reg, 16, len_reg);
+    __ add(to, 16, to);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
+    __ delayed()->nop();
+    __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+    __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+    __ retl();
+    __ delayed()->mov(L1, O0);
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_cbcenc256);
+    __ ldx(from,0,G3);
+    __ ldx(from,8,G4);
+    __ xor3(G1,G3,G3);
+    __ xor3(G2,G4,G4);
+    __ movxtod(G3,F56);
+    __ movxtod(G4,F58);
+    __ fxor(FloatRegisterImpl::D, F60, F56, F60);
+    __ fxor(FloatRegisterImpl::D, F62, F58, F62);
+
+    // FOURTEEN_EROUNDS
+    for ( int i = 0;  i <= 48; i += 8 ) {
+      __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
+      __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
+      if (i != 48 ) {
+        __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
+      } else {
+        __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
+        __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
+      }
+    }
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+    __ add(from, 16, from);
+    __ subcc(len_reg, 16, len_reg);
+    __ add(to, 16, to);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
+    __ delayed()->nop();
+    __ stf(FloatRegisterImpl::D, F60, rvec, 0);
+    __ stf(FloatRegisterImpl::D, F62, rvec, 8);
+    __ retl();
+    __ delayed()->mov(L1, O0);
+
+    return start;
+  }
+
+  address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+    Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
+    Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
+    address start = __ pc();
+    Register from = I0; // source byte array
+    Register to = I1;   // destination byte array
+    Register key = I2;  // expanded key array
+    Register rvec = I3; // init vector
+    const Register len_reg = I4; // cipher length
+    const Register original_key = I5;  // original key array only required during decryption
+    const Register keylen = L6;  // reg for storing expanded key array length
+
+    // save cipher len before save_frame, to return in the end
+    __ mov(O4, L0);
+    __ save_frame(0); //args are read from I* registers since we save the frame in the beginning
+
+    // load original key from SunJCE expanded decryption key
+    for ( int i = 0;  i <= 3; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // load initial vector
+    __ ldx(rvec,0,L0);
+    __ ldx(rvec,8,L1);
+
+    // read expanded key array length
+    __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
+
+    // 256-bit original key size
+    __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
+
+    // 128-bit original key size
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 4 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
+    }
+
+    // load expanded key[last-1] and key[last] elements
+    __ movdtox(F40,L2);
+    __ movdtox(F42,L3);
+
+    __ and3(len_reg, 16, L4);
+    __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
+    __ delayed()->nop();
+
+    __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
+    __ delayed()->nop();
+
+    __ BIND(L_expand192bit);
+    // load rest of the 192-bit key
+    __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
+    __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 36; i += 6 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
+    }
+    __ aes_kexpand1(F42, F46, 7, F48);
+    __ aes_kexpand2(F44, F48, F50);
+
+    // load expanded key[last-1] and key[last] elements
+    __ movdtox(F48,L2);
+    __ movdtox(F50,L3);
+
+    __ and3(len_reg, 16, L4);
+    __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
+    __ delayed()->nop();
+
+    __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
+    __ delayed()->nop();
+
+    __ BIND(L_expand256bit);
+    // load rest of the 256-bit key
+    for ( int i = 4;  i <= 7; i++ ) {
+      __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
+    }
+
+    // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
+    for ( int i = 0;  i <= 40; i += 8 ) {
+      __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
+      __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
+      __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
+      __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
+    }
+    __ aes_kexpand1(F48, F54, 6, F56);
+    __ aes_kexpand2(F50, F56, F58);
+
+    // load expanded key[last-1] and key[last] elements
+    __ movdtox(F56,L2);
+    __ movdtox(F58,L3);
+
+    __ and3(len_reg, 16, L4);
+    __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
+    __ delayed()->nop();
+
+    __ BIND(L_dec_first_block_start);
+    __ ldx(from,0,L4);
+    __ ldx(from,8,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    // 128-bit original key size
+    __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
+
+    __ aes_dround23(F54, F60, F62, F58);
+    __ aes_dround01(F52, F60, F62, F56);
+    __ aes_dround23(F50, F56, F58, F62);
+    __ aes_dround01(F48, F56, F58, F60);
+
+    __ BIND(L_dec_first_block192);
+    __ aes_dround23(F46, F60, F62, F58);
+    __ aes_dround01(F44, F60, F62, F56);
+    __ aes_dround23(F42, F56, F58, F62);
+    __ aes_dround01(F40, F56, F58, F60);
+
+    __ BIND(L_dec_first_block128);
+    for ( int i = 38;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      if ( i != 6) {
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+      }
+    }
+
+    __ movxtod(L0,F56);
+    __ movxtod(L1,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 0);
+    __ stf(FloatRegisterImpl::D, F62, to, 8);
+
+    __ add(from, 16, from);
+    __ add(to, 16, to);
+    __ subcc(len_reg, 16, len_reg);
+    __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
+    __ delayed()->nop();
+
+    // 256-bit original key size
+    __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
+
+    // 192-bit original key size
+    __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_dec_next2_blocks128);
+    __ nop();
+
+    // F40:F42 used for first 16-bytes
+    __ ldx(from,0,G4);
+    __ ldx(from,8,G5);
+    __ xor3(L2,G4,G1);
+    __ movxtod(G1,F40);
+    __ xor3(L3,G5,G1);
+    __ movxtod(G1,F42);
+
+    // F60:F62 used for next 16-bytes
+    __ ldx(from,16,L4);
+    __ ldx(from,24,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    for ( int i = 38;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F40, F42, F44);
+      __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      if (i != 6 ) {
+        __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
+        __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
+        __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+      }
+    }
+
+    __ movxtod(L0,F46);
+    __ movxtod(L1,F44);
+    __ fxor(FloatRegisterImpl::D, F46, F40, F40);
+    __ fxor(FloatRegisterImpl::D, F44, F42, F42);
+
+    __ stf(FloatRegisterImpl::D, F40, to, 0);
+    __ stf(FloatRegisterImpl::D, F42, to, 8);
+
+    __ movxtod(G4,F56);
+    __ movxtod(G5,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 16);
+    __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+    __ add(from, 32, from);
+    __ add(to, 32, to);
+    __ subcc(len_reg, 32, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
+    __ delayed()->nop();
+    __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
+    __ delayed()->nop();
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_dec_next2_blocks192);
+    __ nop();
+
+    // F48:F50 used for first 16-bytes
+    __ ldx(from,0,G4);
+    __ ldx(from,8,G5);
+    __ xor3(L2,G4,G1);
+    __ movxtod(G1,F48);
+    __ xor3(L3,G5,G1);
+    __ movxtod(G1,F50);
+
+    // F60:F62 used for next 16-bytes
+    __ ldx(from,16,L4);
+    __ ldx(from,24,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    for ( int i = 46;  i >= 6; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F48, F50, F52);
+      __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      if (i != 6 ) {
+        __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
+        __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
+        __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+      } else {
+        __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
+        __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
+        __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
+        __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
+      }
+    }
+
+    __ movxtod(L0,F54);
+    __ movxtod(L1,F52);
+    __ fxor(FloatRegisterImpl::D, F54, F48, F48);
+    __ fxor(FloatRegisterImpl::D, F52, F50, F50);
+
+    __ stf(FloatRegisterImpl::D, F48, to, 0);
+    __ stf(FloatRegisterImpl::D, F50, to, 8);
+
+    __ movxtod(G4,F56);
+    __ movxtod(G5,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 16);
+    __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+    __ add(from, 32, from);
+    __ add(to, 32, to);
+    __ subcc(len_reg, 32, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
+    __ delayed()->nop();
+    __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
+    __ delayed()->nop();
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_dec_next2_blocks256);
+    __ nop();
+
+    // F0:F2 used for first 16-bytes
+    __ ldx(from,0,G4);
+    __ ldx(from,8,G5);
+    __ xor3(L2,G4,G1);
+    __ movxtod(G1,F0);
+    __ xor3(L3,G5,G1);
+    __ movxtod(G1,F2);
+
+    // F60:F62 used for next 16-bytes
+    __ ldx(from,16,L4);
+    __ ldx(from,24,L5);
+    __ xor3(L2,L4,G1);
+    __ movxtod(G1,F60);
+    __ xor3(L3,L5,G1);
+    __ movxtod(G1,F62);
+
+    __ aes_dround23(F54, F0, F2, F4);
+    __ aes_dround01(F52, F0, F2, F6);
+    __ aes_dround23(F54, F60, F62, F58);
+    __ aes_dround01(F52, F60, F62, F56);
+    __ aes_dround23(F50, F6, F4, F2);
+    __ aes_dround01(F48, F6, F4, F0);
+    __ aes_dround23(F50, F56, F58, F62);
+    __ aes_dround01(F48, F56, F58, F60);
+    // save F48:F54 in temp registers
+    __ movdtox(F54,G2);
+    __ movdtox(F52,G3);
+    __ movdtox(F50,G6);
+    __ movdtox(F48,G1);
+    for ( int i = 46;  i >= 14; i -= 8 ) {
+      __ aes_dround23(as_FloatRegister(i), F0, F2, F4);
+      __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
+      __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
+      __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
+      __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
+      __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
+      __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
+      __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
+    }
+    // init F48:F54 with F0:F6 values (original key)
+    __ ldf(FloatRegisterImpl::D, original_key, 0, F48);
+    __ ldf(FloatRegisterImpl::D, original_key, 8, F50);
+    __ ldf(FloatRegisterImpl::D, original_key, 16, F52);
+    __ ldf(FloatRegisterImpl::D, original_key, 24, F54);
+    __ aes_dround23(F54, F0, F2, F4);
+    __ aes_dround01(F52, F0, F2, F6);
+    __ aes_dround23(F54, F60, F62, F58);
+    __ aes_dround01(F52, F60, F62, F56);
+    __ aes_dround23_l(F50, F6, F4, F2);
+    __ aes_dround01_l(F48, F6, F4, F0);
+    __ aes_dround23_l(F50, F56, F58, F62);
+    __ aes_dround01_l(F48, F56, F58, F60);
+    // re-init F48:F54 with their original values
+    __ movxtod(G2,F54);
+    __ movxtod(G3,F52);
+    __ movxtod(G6,F50);
+    __ movxtod(G1,F48);
+
+    __ movxtod(L0,F6);
+    __ movxtod(L1,F4);
+    __ fxor(FloatRegisterImpl::D, F6, F0, F0);
+    __ fxor(FloatRegisterImpl::D, F4, F2, F2);
+
+    __ stf(FloatRegisterImpl::D, F0, to, 0);
+    __ stf(FloatRegisterImpl::D, F2, to, 8);
+
+    __ movxtod(G4,F56);
+    __ movxtod(G5,F58);
+    __ mov(L4,L0);
+    __ mov(L5,L1);
+    __ fxor(FloatRegisterImpl::D, F56, F60, F60);
+    __ fxor(FloatRegisterImpl::D, F58, F62, F62);
+
+    __ stf(FloatRegisterImpl::D, F60, to, 16);
+    __ stf(FloatRegisterImpl::D, F62, to, 24);
+
+    __ add(from, 32, from);
+    __ add(to, 32, to);
+    __ subcc(len_reg, 32, len_reg);
+    __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
+    __ delayed()->nop();
+
+    __ BIND(L_cbcdec_end);
+    __ stx(L0, rvec, 0);
+    __ stx(L1, rvec, 8);
+    __ restore();
+    __ mov(L0, O0);
+    __ retl();
+    __ delayed()->nop();
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -3368,6 +4137,14 @@
     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
                                                        &StubRoutines::_safefetchN_fault_pc,
                                                        &StubRoutines::_safefetchN_continuation_pc);
+
+    // generate AES intrinsics code
+    if (UseAESIntrinsics) {
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+    }
   }

--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -156,6 +156,10 @@
 address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
   address entry = __ pc();
 
+  if (state == atos) {
+    __ profile_return_type(O0, G3_scratch, G1_scratch);
+  }
+
 #if !defined(_LP64) && defined(COMPILER2)
   // All return values are where we want them, except for Longs.  C2 returns
   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
@@ -1333,6 +1337,7 @@
   __ movbool(true, G3_scratch);
   __ stbool(G3_scratch, do_not_unlock_if_synchronized);
 
+  __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch);
   // increment invocation counter and check for overflow
   //
   // Note: checking for negative value instead of overflow

--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -2942,12 +2942,12 @@
 
 
 void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
-  Register Rtemp = G4_scratch;
   Register Rcall = Rindex;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
   // get target Method* & entry point
   __ lookup_virtual_method(Rrecv, Rindex, G5_method);
+  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -3022,6 +3022,7 @@
   __ null_check(O0);
 
   __ profile_final_call(O4);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, true);
 
   // get return address
   AddressLiteral table(Interpreter::invoke_return_entry_table());
@@ -3051,6 +3052,7 @@
 
   // do the call
   __ profile_call(O4);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3066,6 +3068,7 @@
 
   // do the call
   __ profile_call(O4);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3091,6 +3094,7 @@
   // do the call - the index (f2) contains the Method*
   assert_different_registers(G5_method, Gargs, Rcall);
   __ mov(Rindex, G5_method);
+  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
   __ call_from_interpreter(Rcall, Gargs, Rret);
   __ bind(notFinal);
 
@@ -3197,6 +3201,7 @@
   Register Rcall = Rinterface;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
+  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -3226,6 +3231,7 @@
   // do the call
   __ verify_oop(G4_mtype);
   __ profile_final_call(O4);  // FIXME: profile the LambdaForm also
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, true);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3262,6 +3268,7 @@
 
   // do the call
   __ verify_oop(G4_callsite);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }

--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -234,7 +234,7 @@
   assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
                (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", vis1" : ""),
@@ -242,6 +242,7 @@
                (has_vis3() ? ", vis3" : ""),
                (has_blk_init() ? ", blk_init" : ""),
                (has_cbcond() ? ", cbcond" : ""),
+               (has_aes() ? ", aes" : ""),
                (is_ultra3() ? ", ultra3" : ""),
                (is_sun4v() ? ", sun4v" : ""),
                (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
@@ -265,6 +266,41 @@
   if (!has_vis1()) // Drop to 0 if no VIS1 support
     UseVIS = 0;
 
+  // T2 and above should have support for AES instructions
+  if (has_aes()) {
+    if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1
+      if (FLAG_IS_DEFAULT(UseAES)) {
+        FLAG_SET_DEFAULT(UseAES, true);
+      }
+      if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+      }
+      // we disable both the AES flags if either of them is disabled on the command line
+      if (!UseAES || !UseAESIntrinsics) {
+        FLAG_SET_DEFAULT(UseAES, false);
+        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+      }
+    } else {
+        if (UseAES || UseAESIntrinsics) {
+          warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+          if (UseAES) {
+            FLAG_SET_DEFAULT(UseAES, false);
+          }
+          if (UseAESIntrinsics) {
+            FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+          }
+        }
+    }
+  } else if (UseAES || UseAESIntrinsics) {
+    warning("AES instructions are not available on this CPU");
+    if (UseAES) {
+      FLAG_SET_DEFAULT(UseAES, false);
+    }
+    if (UseAESIntrinsics) {
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    }
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;

--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -48,7 +48,8 @@
     sparc64_family       = 14,
     M_family             = 15,
     T_family             = 16,
-    T1_model             = 17
+    T1_model             = 17,
+    aes_instructions     = 18
   };
 
   enum Feature_Flag_Set {
@@ -73,6 +74,7 @@
     M_family_m              = 1 << M_family,
     T_family_m              = 1 << T_family,
     T1_model_m              = 1 << T1_model,
+    aes_instructions_m      = 1 << aes_instructions,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -123,6 +125,7 @@
   static bool has_vis3()                { return (_features & vis3_instructions_m) != 0; }
   static bool has_blk_init()            { return (_features & blk_init_instructions_m) != 0; }
   static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
+  static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }
 
   static bool supports_compare_and_exchange()
                                         { return has_v9(); }

--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -38,6 +38,7 @@
 #include "nativeInst_x86.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "vmreg_x86.inline.hpp"
 
 
 // These masks are used to provide 128-bit aligned bitmasks to the XMM
@@ -1006,6 +1007,9 @@
     if (UseCompressedOops && !wide) {
       __ movptr(compressed_src, src->as_register());
       __ encode_heap_oop(compressed_src);
+      if (patch_code != lir_patch_none) {
+        info->oop_map()->set_narrowoop(compressed_src->as_VMReg());
+      }
     }
 #endif
   }

--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -941,6 +941,8 @@
     case vmIntrinsics::_updateCRC32: {
       LIRItem crc(x->argument_at(0), this);
       LIRItem val(x->argument_at(1), this);
+      // val is destroyed by update_crc32
+      val.set_destroys_register();
       crc.load_item();
       val.load_item();
       __ update_crc32(crc.result(), val.result(), result);

--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -127,7 +127,7 @@
 
       if (MethodData::profile_return()) {
         // We're right after the type profile for the last
-        // argument. tmp is the number of cell left in the
+        // argument. tmp is the number of cells left in the
         // CallTypeData/VirtualCallTypeData to reach its end. Non null
         // if there's a return to profile.
         assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
@@ -198,7 +198,7 @@
     // parameters. Collect profiling from last parameter down.
     // mdo start + parameters offset + array length - 1
     addptr(mdp, tmp1);
-    movptr(tmp1, Address(mdp, in_bytes(ArrayData::array_len_offset())));
+    movptr(tmp1, Address(mdp, ArrayData::array_len_offset()));
     decrement(tmp1, TypeStackSlotEntries::per_arg_count());
 
     Label loop;

--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -2403,6 +2403,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //
   address generate_cipherBlockChaining_encryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
@@ -2483,7 +2486,7 @@
     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
 
     handleSOERegisters(false /*restoring*/);
-    __ movl(rax, 0);                             // return 0 (why?)
+    __ movptr(rax, len_param); // return length
     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
@@ -2557,6 +2560,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //
 
   address generate_cipherBlockChaining_decryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
@@ -2650,7 +2656,7 @@
     __ movptr(rvec , rvec_param);                                     // restore this since used in loop
     __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
     handleSOERegisters(false /*restoring*/);
-    __ movl(rax, 0);                                                  // return 0 (why?)
+    __ movptr(rax, len_param); // return length
     __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
     __ ret(0);

--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -3217,6 +3217,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //
   address generate_cipherBlockChaining_encryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
@@ -3232,7 +3235,7 @@
 #ifndef _WIN64
     const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
 #else
-    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
     const Register len_reg     = r10;      // pick the first volatile windows register
 #endif
     const Register pos         = rax;
@@ -3259,6 +3262,8 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(xmm_save(i), as_XMMRegister(i));
     }
+#else
+    __ push(len_reg); // Save
 #endif
 
     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
@@ -3301,8 +3306,10 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(as_XMMRegister(i), xmm_save(i));
     }
+    __ movl(rax, len_mem);
+#else
+    __ pop(rax); // return length
 #endif
-    __ movl(rax, 0); // return 0 (why?)
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
@@ -3409,6 +3416,9 @@
   //   c_rarg3   - r vector byte array address
   //   c_rarg4   - input length
   //
+  // Output:
+  //   rax       - input length
+  //
 
   address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
@@ -3427,7 +3437,7 @@
 #ifndef _WIN64
     const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
 #else
-    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
     const Register len_reg     = r10;      // pick the first volatile windows register
 #endif
     const Register pos         = rax;
@@ -3448,7 +3458,10 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(xmm_save(i), as_XMMRegister(i));
     }
+#else
+    __ push(len_reg); // Save
 #endif
+
     // the java expanded key ordering is rotated one position from what we want
     // so we start from 0x10 here and hit 0x00 last
     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
@@ -3554,8 +3567,10 @@
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(as_XMMRegister(i), xmm_save(i));
     }
+    __ movl(rax, len_mem);
+#else
+    __ pop(rax); // return length
 #endif
-    __ movl(rax, 0); // return 0 (why?)
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);

--- a/hotspot/src/cpu/x86/vm/x86.ad	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Fri Jan 17 10:43:43 2014 -0800
@@ -581,6 +581,12 @@
   return !AlignVector; // can be changed by flag
 }
 
+// x86 AES instructions are compatible with SunJCE expanded
+// keys, hence we do not need to pass the original key to stubs
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
 // Helper methods for MachSpillCopyNode::implementation().
 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st) {

--- a/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -119,6 +119,11 @@
 #endif
     if (av & AV_SPARC_CBCOND)       features |= cbcond_instructions_m;
 
+#ifndef AV_SPARC_AES
+#define AV_SPARC_AES 0x00020000  /* aes instrs supported */
+#endif
+    if (av & AV_SPARC_AES)       features |= aes_instructions_m;
+
   } else {
     // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT

--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -3288,7 +3288,10 @@
   ciSignature* signature_at_call = NULL;
   x->method()->get_method_at_bci(bci, ignored_will_link, &signature_at_call);
 
-  ciKlass* exact = profile_type(md, 0, md->byte_offset_of_slot(data, ret->type_offset()),
+  // The offset within the MDO of the entry to update may be too large
+  // to be used in load/store instructions on some platforms. So have
+  // profile_type() compute the address of the profile in a register.
+  ciKlass* exact = profile_type(md, md->byte_offset_of_slot(data, ret->type_offset()), 0,
                                 ret->type(), x->ret(), mdp,
                                 !x->needs_null_check(),
                                 signature_at_call->return_type()->as_klass(),

--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -787,7 +787,7 @@
    do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
    do_name(     encrypt_name,                                      "encrypt")                                           \
    do_name(     decrypt_name,                                      "decrypt")                                           \
-   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)V")                                        \
+   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
   /* support for java.util.zip */                                                                                       \
   do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \

--- a/hotspot/src/share/vm/code/codeCache.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/code/codeCache.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -596,20 +596,13 @@
 }
 
 #ifndef PRODUCT
-// used to keep track of how much time is spent in mark_for_deoptimization
+// Keeps track of time spent for checking dependencies
 static elapsedTimer dependentCheckTime;
-static int dependentCheckCount = 0;
-#endif // PRODUCT
+#endif
 
 
 int CodeCache::mark_for_deoptimization(DepChange& changes) {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-
-#ifndef PRODUCT
-  dependentCheckTime.start();
-  dependentCheckCount++;
-#endif // PRODUCT
-
   int number_of_marked_CodeBlobs = 0;
 
   // search the hierarchy looking for nmethods which are affected by the loading of this class
@@ -617,32 +610,23 @@
   // then search the interfaces this class implements looking for nmethods
   // which might be dependent of the fact that an interface only had one
   // implementor.
-
-  { No_Safepoint_Verifier nsv;
-    for (DepChange::ContextStream str(changes, nsv); str.next(); ) {
-      Klass* d = str.klass();
-      number_of_marked_CodeBlobs += InstanceKlass::cast(d)->mark_dependent_nmethods(changes);
-    }
-  }
-
-  if (VerifyDependencies) {
-    // Turn off dependency tracing while actually testing deps.
-    NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) );
-    FOR_ALL_ALIVE_NMETHODS(nm) {
-      if (!nm->is_marked_for_deoptimization() &&
-          nm->check_all_dependencies()) {
-        ResourceMark rm;
-        tty->print_cr("Should have been marked for deoptimization:");
-        changes.print();
-        nm->print();
-        nm->print_dependencies();
-      }
-    }
+  // nmethod::check_all_dependencies works only correctly, if no safepoint
+  // can happen
+  No_Safepoint_Verifier nsv;
+  for (DepChange::ContextStream str(changes, nsv); str.next(); ) {
+    Klass* d = str.klass();
+    number_of_marked_CodeBlobs += InstanceKlass::cast(d)->mark_dependent_nmethods(changes);
   }
 
 #ifndef PRODUCT
-  dependentCheckTime.stop();
-#endif // PRODUCT
+  if (VerifyDependencies) {
+    // Object pointers are used as unique identifiers for dependency arguments. This
+    // is only possible if no safepoint, i.e., GC occurs during the verification code.
+    dependentCheckTime.start();
+    nmethod::check_all_dependencies(changes);
+    dependentCheckTime.stop();
+  }
+#endif
 
   return number_of_marked_CodeBlobs;
 }
@@ -899,9 +883,7 @@
   }
 
   tty->print_cr("CodeCache:");
-
-  tty->print_cr("nmethod dependency checking time %f", dependentCheckTime.seconds(),
-                dependentCheckTime.seconds() / dependentCheckCount);
+  tty->print_cr("nmethod dependency checking time %fs", dependentCheckTime.seconds());
 
   if (!live.is_empty()) {
     live.print("live");

--- a/hotspot/src/share/vm/code/dependencies.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/code/dependencies.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -678,6 +678,17 @@
   return result;
 }
 
+/**
+ * Returns a unique identifier for each dependency argument.
+ */
+uintptr_t Dependencies::DepStream::get_identifier(int i) {
+  if (has_oop_argument()) {
+    return (uintptr_t)(oopDesc*)argument_oop(i);
+  } else {
+    return (uintptr_t)argument(i);
+  }
+}
+
 oop Dependencies::DepStream::argument_oop(int i) {
   oop result = recorded_oop_at(argument_index(i));
   assert(result == NULL || result->is_oop(), "must be");
@@ -713,6 +724,57 @@
   return NULL;
 }
 
+// ----------------- DependencySignature --------------------------------------
+bool DependencySignature::equals(const DependencySignature& sig) const {
+  if (type() != sig.type()) {
+    return false;
+  }
+
+  if (args_count() != sig.args_count()) {
+    return false;
+  }
+
+  for (int i = 0; i < sig.args_count(); i++) {
+    if (arg(i) != sig.arg(i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+
+// ----------------- DependencySignatureBuffer --------------------------------------
+DependencySignatureBuffer::DependencySignatureBuffer() {
+  _signatures = NEW_RESOURCE_ARRAY(GrowableArray<DependencySignature*>*, Dependencies::TYPE_LIMIT);
+  memset(_signatures, 0, sizeof(DependencySignature*) * Dependencies::TYPE_LIMIT);
+}
+
+/* Check if arguments are identical. Two dependency signatures are considered
+ * identical, if the type as well as all argument identifiers are identical.
+ * If the dependency has not already been checked, the dependency signature is
+ * added to the checked dependencies of the same type. The function returns
+ * false, which causes the dependency to be checked in the caller.
+ */
+bool DependencySignatureBuffer::add_if_missing(const DependencySignature& sig) {
+  const int index = sig.type();
+  GrowableArray<DependencySignature*>* buffer = _signatures[index];
+  if (buffer == NULL) {
+    buffer = new GrowableArray<DependencySignature*>();
+    _signatures[index] = buffer;
+  }
+
+  // Check if we have already checked the dependency
+  for (int i = 0; i < buffer->length(); i++) {
+    DependencySignature* checked_signature = buffer->at(i);
+    if (checked_signature->equals(sig)) {
+      return true;
+    }
+  }
+  buffer->append((DependencySignature*)&sig);
+  return false;
+}
+
+
 /// Checking dependencies:
 
 // This hierarchy walker inspects subtypes of a given type,

--- a/hotspot/src/share/vm/code/dependencies.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/code/dependencies.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -480,6 +480,9 @@
     bool next();
 
     DepType type()               { return _type; }
+    bool has_oop_argument()      { return type() == call_site_target_value; }
+    uintptr_t get_identifier(int i);
+
     int argument_count()         { return dep_args(type()); }
     int argument_index(int i)    { assert(0 <= i && i < argument_count(), "oob");
                                    return _xi[i]; }
@@ -523,6 +526,38 @@
 };
 
 
+class DependencySignature : public ResourceObj {
+ private:
+  int                   _args_count;
+  uintptr_t             _argument_hash[Dependencies::max_arg_count];
+  Dependencies::DepType _type;
+
+
+ public:
+  DependencySignature(Dependencies::DepStream& dep) {
+    _args_count = dep.argument_count();
+    _type = dep.type();
+    for (int i = 0; i < _args_count; i++) {
+      _argument_hash[i] = dep.get_identifier(i);
+    }
+  }
+
+  bool equals(const DependencySignature& sig) const;
+
+  int args_count()             const { return _args_count; }
+  uintptr_t arg(int idx)       const { return _argument_hash[idx]; }
+  Dependencies::DepType type() const { return _type; }
+};
+
+class DependencySignatureBuffer : public StackObj {
+ private:
+  GrowableArray<DependencySignature*>**  _signatures;
+
+ public:
+  DependencySignatureBuffer();
+  bool add_if_missing(const DependencySignature& sig);
+};
+
 // Every particular DepChange is a sub-class of this class.
 class DepChange : public StackObj {
  public:

--- a/hotspot/src/share/vm/code/nmethod.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -2161,16 +2161,41 @@
 }
 
 
-bool nmethod::check_all_dependencies() {
-  bool found_check = false;
-  // wholesale check of all dependencies
-  for (Dependencies::DepStream deps(this); deps.next(); ) {
-    if (deps.check_dependency() != NULL) {
-      found_check = true;
-      NOT_DEBUG(break);
+void nmethod::check_all_dependencies(DepChange& changes) {
+  // Checked dependencies are allocated into this ResourceMark
+  ResourceMark rm;
+
+  // Turn off dependency tracing while actually testing dependencies.
+  NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) );
+
+  // 'dep_signature_buffers' caches already checked dependencies.
+  DependencySignatureBuffer dep_signature_buffers;
+
+  // Iterate over live nmethods and check dependencies of all nmethods that are not
+  // marked for deoptimization. A particular dependency is only checked once.
+  for(nmethod* nm = CodeCache::alive_nmethod(CodeCache::first()); nm != NULL; nm = CodeCache::alive_nmethod(CodeCache::next(nm))) {
+    if (!nm->is_marked_for_deoptimization()) {
+      for (Dependencies::DepStream deps(nm); deps.next(); ) {
+        // Construct abstraction of a dependency.
+        const DependencySignature* current_sig = new DependencySignature(deps);
+        // Determine if 'deps' is already checked. If it is not checked,
+        // 'add_if_missing()' adds the dependency signature and returns
+        // false.
+        if (!dep_signature_buffers.add_if_missing(*current_sig)) {
+          if (deps.check_dependency() != NULL) {
+            // Dependency checking failed. Print out information about the failed
+            // dependency and finally fail with an assert. We can fail here, since
+            // dependency checking is never done in a product build.
+            ResourceMark rm;
+            changes.print();
+            nm->print();
+            nm->print_dependencies();
+            assert(false, "Should have been marked for deoptimization");
+          }
+        }
+      }
     }
   }
-  return found_check;  // tell caller if we found anything
 }
 
 bool nmethod::check_dependency_on(DepChange& changes) {

--- a/hotspot/src/share/vm/code/nmethod.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -679,7 +679,7 @@
 
   // tells if any of this method's dependencies have been invalidated
   // (this is expensive!)
-  bool check_all_dependencies();
+  static void check_all_dependencies(DepChange& changes);
 
   // tells if this compiled method is dependent on the given changes,
   // and the changes have invalidated it

--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -1018,7 +1018,7 @@
           !jtkp->klass_is_exact() && // Keep exact interface klass (6894807)
           ttkp->is_loaded() && !ttkp->klass()->is_interface() ) {
         assert(ft == ttkp->cast_to_ptr_type(jtkp->ptr()) ||
-               ft->isa_narrowoop() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), "");
+               ft->isa_narrowklass() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), "");
         jt = ft;
       }
     }

--- a/hotspot/src/share/vm/opto/library_call.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -304,6 +304,7 @@
   bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
+  Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
   bool inline_encodeISOArray();
   bool inline_updateCRC32();
   bool inline_updateBytesCRC32();
@@ -5936,10 +5937,22 @@
   Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
   if (k_start == NULL) return false;
 
-  // Call the stub.
-  make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
-                    stubAddr, stubName, TypePtr::BOTTOM,
-                    src_start, dest_start, k_start);
+  if (Matcher::pass_original_key_for_aes()) {
+    // on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
+    // compatibility issues between Java key expansion and SPARC crypto instructions
+    Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
+    if (original_k_start == NULL) return false;
+
+    // Call the stub.
+    make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+                      stubAddr, stubName, TypePtr::BOTTOM,
+                      src_start, dest_start, k_start, original_k_start);
+  } else {
+    // Call the stub.
+    make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+                      stubAddr, stubName, TypePtr::BOTTOM,
+                      src_start, dest_start, k_start);
+  }
 
   return true;
 }
@@ -6017,14 +6030,29 @@
   if (objRvec == NULL) return false;
   Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
 
-  // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
-  make_runtime_call(RC_LEAF|RC_NO_FP,
-                    OptoRuntime::cipherBlockChaining_aescrypt_Type(),
-                    stubAddr, stubName, TypePtr::BOTTOM,
-                    src_start, dest_start, k_start, r_start, len);
-
-  // return is void so no result needs to be pushed
-
+  Node* cbcCrypt;
+  if (Matcher::pass_original_key_for_aes()) {
+    // on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
+    // compatibility issues between Java key expansion and SPARC crypto instructions
+    Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
+    if (original_k_start == NULL) return false;
+
+    // Call the stub, passing src_start, dest_start, k_start, r_start, src_len and original_k_start
+    cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, dest_start, k_start, r_start, len, original_k_start);
+  } else {
+    // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+    cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, dest_start, k_start, r_start, len);
+  }
+
+  // return cipher length (int)
+  Node* retvalue = _gvn.transform(new (C) ProjNode(cbcCrypt, TypeFunc::Parms));
+  set_result(retvalue);
   return true;
 }
 
@@ -6039,6 +6067,17 @@
   return k_start;
 }
 
+//------------------------------get_original_key_start_from_aescrypt_object-----------------------
+Node * LibraryCallKit::get_original_key_start_from_aescrypt_object(Node *aescrypt_object) {
+  Node* objAESCryptKey = load_field_from_object(aescrypt_object, "lastKey", "[B", /*is_exact*/ false);
+  assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
+  if (objAESCryptKey == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the lastKey array
+  Node* original_k_start = array_element_address(objAESCryptKey, intcon(0), T_BYTE);
+  return original_k_start;
+}
+
 //----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
 // Return node representing slow path of predicate check.
 // the pseudo code we want to emulate with this predicate is:

--- a/hotspot/src/share/vm/opto/matcher.hpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/opto/matcher.hpp	Fri Jan 17 10:43:43 2014 -0800
@@ -286,6 +286,9 @@
   // CPU supports misaligned vectors store/load.
   static const bool misaligned_vectors_ok();
 
+  // Should original key array reference be passed to AES stubs
+  static const bool pass_original_key_for_aes();
+
   // Used to determine a "low complexity" 64-bit constant.  (Zero is simple.)
   // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
   // Depends on the details of 64-bit constant generation on the CPU.

--- a/hotspot/src/share/vm/opto/runtime.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -814,12 +814,18 @@
 const TypeFunc* OptoRuntime::aescrypt_block_Type() {
   // create input type (domain)
   int num_args      = 3;
+  if (Matcher::pass_original_key_for_aes()) {
+    num_args = 4;
+  }
   int argcnt = num_args;
   const Type** fields = TypeTuple::fields(argcnt);
   int argp = TypeFunc::Parms;
   fields[argp++] = TypePtr::NOTNULL;    // src
   fields[argp++] = TypePtr::NOTNULL;    // dest
   fields[argp++] = TypePtr::NOTNULL;    // k array
+  if (Matcher::pass_original_key_for_aes()) {
+    fields[argp++] = TypePtr::NOTNULL;    // original k array
+  }
   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
 
@@ -856,6 +862,9 @@
 const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
   // create input type (domain)
   int num_args      = 5;
+  if (Matcher::pass_original_key_for_aes()) {
+    num_args = 6;
+  }
   int argcnt = num_args;
   const Type** fields = TypeTuple::fields(argcnt);
   int argp = TypeFunc::Parms;
@@ -864,13 +873,16 @@
   fields[argp++] = TypePtr::NOTNULL;    // k array
   fields[argp++] = TypePtr::NOTNULL;    // r array
   fields[argp++] = TypeInt::INT;        // src len
+  if (Matcher::pass_original_key_for_aes()) {
+    fields[argp++] = TypePtr::NOTNULL;    // original k array
+  }
   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
 
-  // no result type needed
+  // returning cipher len (int)
   fields = TypeTuple::fields(1);
-  fields[TypeFunc::Parms+0] = NULL; // void
-  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  fields[TypeFunc::Parms+0] = TypeInt::INT;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
   return TypeFunc::make(domain, range);
 }

--- a/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/prims/jvmtiRedefineClasses.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -147,6 +147,9 @@
     _scratch_classes[i] = NULL;
   }
 
+  // Disable any dependent concurrent compilations
+  SystemDictionary::notice_modification();
+
   // Set flag indicating that some invariants are no longer true.
   // See jvmtiExport.hpp for detailed explanation.
   JvmtiExport::set_has_redefined_a_class();

--- a/hotspot/src/share/vm/runtime/arguments.cpp	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Fri Jan 17 10:43:43 2014 -0800
@@ -3727,10 +3727,6 @@
     // Doing the replace in parent maps helps speculation
     FLAG_SET_DEFAULT(ReplaceInParentMaps, true);
   }
-#ifndef X86
-  // Only on x86 for now
-  FLAG_SET_DEFAULT(TypeProfileLevel, 0);
-#endif
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {

--- a/hotspot/test/compiler/7184394/TestAESMain.java	Fri Jan 17 09:40:04 2014 +0100
+++ b/hotspot/test/compiler/7184394/TestAESMain.java	Fri Jan 17 10:43:43 2014 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,20 +39,32 @@
     System.out.println(iters + " iterations");
     TestAESEncode etest = new TestAESEncode();
     etest.prepare();
+    // warm-up for 20K iterations
+    System.out.println("Starting encryption warm-up");
+    for (int i=0; i<20000; i++) {
+      etest.run();
+    }
+    System.out.println("Finished encryption warm-up");
     long start = System.nanoTime();
     for (int i=0; i<iters; i++) {
       etest.run();
     }
     long end = System.nanoTime();
-    System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+    System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000.0) + " ms");
 
     TestAESDecode dtest = new TestAESDecode();
     dtest.prepare();
+    // warm-up for 20K iterations
+    System.out.println("Starting decryption warm-up");
+    for (int i=0; i<20000; i++) {
+      dtest.run();
+    }
+    System.out.println("Finished decryption warm-up");
     start = System.nanoTime();
     for (int i=0; i<iters; i++) {
       dtest.run();
     }
     end = System.nanoTime();
-    System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
+    System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000.0) + " ms");
   }
 }

author	morris
	Fri, 17 Jan 2014 10:43:43 -0800
changeset 22509	29988315e412
parent 22500	b6f6a9f5c5d6 (current diff)
parent 22508	4f8b051ff895 (diff)
child 22510	b19a059832aa
child 22524	f1db7977d906
child 22545	b93a7f0e9b9d