6919069: client compiler needs to capture more profile information for tiered work
authoriveresov
Mon, 13 Sep 2010 12:10:49 -0700
changeset 6461 cfc616b49f58
parent 6460 6f5143b00f4c
child 6462 04f64d06050a
6919069: client compiler needs to capture more profile information for tiered work Summary: Added profiling of instanceof and aastore. Reviewed-by: kvn, jrose, never
hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp
hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
hotspot/src/share/vm/c1/c1_Canonicalizer.cpp
hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
hotspot/src/share/vm/c1/c1_Instruction.hpp
hotspot/src/share/vm/c1/c1_LIR.cpp
hotspot/src/share/vm/c1/c1_LIR.hpp
hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -2471,8 +2471,25 @@
   }
 }
 
-void LIR_Assembler::emit_checkcast(LIR_OpTypeCheck *op) {
-  assert(op->code() == lir_checkcast, "Invalid operation");
+
+void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
+                                    ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
+  md = method->method_data();
+  if (md == NULL) {
+    bailout("out of memory building methodDataOop");
+    return;
+  }
+  data = md->bci_to_data(bci);
+  assert(data != NULL,       "need data for checkcast");
+  assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  if (!Assembler::is_simm13(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
+    // The offset is large so bias the mdo by the base of the slot so
+    // that the ld can use simm13s to reference the slots of the data
+    mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
+  }
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
   // we always need a stub for the failure case.
   CodeStub* stub = op->stub();
   Register obj = op->object()->as_register();
@@ -2494,25 +2511,10 @@
   if (op->should_profile()) {
     ciMethod* method = op->profiled_method();
     assert(method != NULL, "Should have method");
-    int bci          = op->profiled_bci();
-    md = method->method_data();
-    if (md == NULL) {
-      bailout("out of memory building methodDataOop");
-      return;
-    }
-    data = md->bci_to_data(bci);
-    assert(data != NULL,       "need data for checkcast");
-    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for checkcast");
-    if (!Assembler::is_simm13(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
-      // The offset is large so bias the mdo by the base of the slot so
-      // that the ld can use simm13s to reference the slots of the data
-      mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
-    }
-
-    // We need two temporaries to perform this operation on SPARC,
-    // so to keep things simple we perform a redundant test here
-    Label profile_done;
-    __ br_notnull(obj, false, Assembler::pn, profile_done);
+    setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
+
+    Label not_null;
+    __ br_notnull(obj, false, Assembler::pn, not_null);
     __ delayed()->nop();
     Register mdo      = k_RInfo;
     Register data_val = Rtmp1;
@@ -2525,13 +2527,17 @@
     __ ldub(flags_addr, data_val);
     __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
     __ stb(data_val, flags_addr);
-    __ bind(profile_done);
+    __ ba(false, *obj_is_null);
+    __ delayed()->nop();
+    __ bind(not_null);
+  } else {
+    __ br_null(obj, false, Assembler::pn, *obj_is_null);
+    __ delayed()->nop();
   }
-  Label profile_cast_failure;
-
-  Label done, done_null;
-  // Where to go in case of cast failure
-  Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+  Label profile_cast_failure, profile_cast_success;
+  Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
+  Label *success_target = op->should_profile() ? &profile_cast_success : success;
 
   // patching may screw with our temporaries on sparc,
   // so let's do it before loading the class
@@ -2541,8 +2547,6 @@
     jobject2reg_with_patching(k_RInfo, op->info_for_patch());
   }
   assert(obj != k_RInfo, "must be different");
-  __ br_null(obj, false, Assembler::pn, done_null);
-  __ delayed()->nop();
 
   // get object class
   // not a safepoint as obj null check happens earlier
@@ -2559,12 +2563,12 @@
         need_slow_path = false;
       // perform the fast part of the checking logic
       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
-                                       (need_slow_path ? &done : NULL),
+                                       (need_slow_path ? success_target : NULL),
                                        failure_target, NULL,
                                        RegisterOrConstant(k->super_check_offset()));
     } else {
       // perform the fast part of the checking logic
-      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done,
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target,
                                        failure_target, NULL);
     }
     if (need_slow_path) {
@@ -2575,27 +2579,24 @@
       __ cmp(G3, 0);
       __ br(Assembler::equal, false, Assembler::pn, *failure_target);
       __ delayed()->nop();
+      // Fall through to success case
     }
   }
-  __ bind(done);
 
   if (op->should_profile()) {
     Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
     assert_different_registers(obj, mdo, recv, tmp1);
-
+    __ bind(profile_cast_success);
     jobject2reg(md->constant_encoding(), mdo);
     if (mdo_offset_bias > 0) {
       __ set(mdo_offset_bias, tmp1);
       __ add(mdo, tmp1, mdo);
     }
-    Label update_done;
     load(Address(obj, oopDesc::klass_offset_in_bytes()), recv, T_OBJECT);
-    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
+    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success);
     // Jump over the failure case
-    __ ba(false, update_done);
+    __ ba(false, *success);
     __ delayed()->nop();
-
-
     // Cast failure case
     __ bind(profile_cast_failure);
     jobject2reg(md->constant_encoding(), mdo);
@@ -2607,17 +2608,13 @@
     __ ld_ptr(data_addr, tmp1);
     __ sub(tmp1, DataLayout::counter_increment, tmp1);
     __ st_ptr(tmp1, data_addr);
-    __ ba(false, *stub->entry());
+    __ ba(false, *failure);
     __ delayed()->nop();
-
-    __ bind(update_done);
   }
-
-  __ bind(done_null);
-  __ mov(obj, dst);
+  __ ba(false, *success);
+  __ delayed()->nop();
 }
 
-
 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
   LIR_Code code = op->code();
   if (code == lir_store_check) {
@@ -2628,88 +2625,106 @@
     Register Rtmp1 = op->tmp3()->as_register();
 
     __ verify_oop(value);
-
     CodeStub* stub = op->stub();
-    Label done;
-    __ br_null(value, false, Assembler::pn, done);
-    __ delayed()->nop();
+    // check if it needs to be profiled
+    ciMethodData* md;
+    ciProfileData* data;
+    int mdo_offset_bias = 0;
+    if (op->should_profile()) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
+    }
+    Label profile_cast_success, profile_cast_failure, done;
+    Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+    Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+    if (op->should_profile()) {
+      Label not_null;
+      __ br_notnull(value, false, Assembler::pn, not_null);
+      __ delayed()->nop();
+      Register mdo      = k_RInfo;
+      Register data_val = Rtmp1;
+      jobject2reg(md->constant_encoding(), mdo);
+      if (mdo_offset_bias > 0) {
+        __ set(mdo_offset_bias, data_val);
+        __ add(mdo, data_val, mdo);
+      }
+      Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias);
+      __ ldub(flags_addr, data_val);
+      __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
+      __ stb(data_val, flags_addr);
+      __ ba(false, done);
+      __ delayed()->nop();
+      __ bind(not_null);
+    } else {
+      __ br_null(value, false, Assembler::pn, done);
+      __ delayed()->nop();
+    }
     load(array, oopDesc::klass_offset_in_bytes(), k_RInfo, T_OBJECT, op->info_for_exception());
     load(value, oopDesc::klass_offset_in_bytes(), klass_RInfo, T_OBJECT, NULL);
 
     // get instance klass
     load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
     // perform the fast part of the checking logic
-    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL);
 
     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
     assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
     __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
     __ delayed()->nop();
     __ cmp(G3, 0);
-    __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
+    __ br(Assembler::equal, false, Assembler::pn, *failure_target);
     __ delayed()->nop();
+    // fall through to the success case
+
+    if (op->should_profile()) {
+      Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
+      assert_different_registers(value, mdo, recv, tmp1);
+      __ bind(profile_cast_success);
+      jobject2reg(md->constant_encoding(), mdo);
+      if (mdo_offset_bias > 0) {
+        __ set(mdo_offset_bias, tmp1);
+        __ add(mdo, tmp1, mdo);
+      }
+      load(Address(value, oopDesc::klass_offset_in_bytes()), recv, T_OBJECT);
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done);
+      __ ba(false, done);
+      __ delayed()->nop();
+      // Cast failure case
+      __ bind(profile_cast_failure);
+      jobject2reg(md->constant_encoding(), mdo);
+      if (mdo_offset_bias > 0) {
+        __ set(mdo_offset_bias, tmp1);
+        __ add(mdo, tmp1, mdo);
+      }
+      Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+      __ ld_ptr(data_addr, tmp1);
+      __ sub(tmp1, DataLayout::counter_increment, tmp1);
+      __ st_ptr(tmp1, data_addr);
+      __ ba(false, *stub->entry());
+      __ delayed()->nop();
+    }
     __ bind(done);
+  } else if (code == lir_checkcast) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success;
+    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+    __ bind(success);
+    __ mov(obj, dst);
   } else if (code == lir_instanceof) {
     Register obj = op->object()->as_register();
-    Register k_RInfo = op->tmp1()->as_register();
-    Register klass_RInfo = op->tmp2()->as_register();
     Register dst = op->result_opr()->as_register();
-    Register Rtmp1 = op->tmp3()->as_register();
-    ciKlass* k = op->klass();
-
-    Label done;
-    if (obj == k_RInfo) {
-      k_RInfo = klass_RInfo;
-      klass_RInfo = obj;
-    }
-    // patching may screw with our temporaries on sparc,
-    // so let's do it before loading the class
-    if (k->is_loaded()) {
-      jobject2reg(k->constant_encoding(), k_RInfo);
-    } else {
-      jobject2reg_with_patching(k_RInfo, op->info_for_patch());
-    }
-    assert(obj != k_RInfo, "must be different");
-    __ br_null(obj, true, Assembler::pn, done);
-    __ delayed()->set(0, dst);
-
-    // get object class
-    // not a safepoint as obj null check happens earlier
-    load(obj, oopDesc::klass_offset_in_bytes(), klass_RInfo, T_OBJECT, NULL);
-    if (op->fast_check()) {
-      __ cmp(k_RInfo, klass_RInfo);
-      __ brx(Assembler::equal, true, Assembler::pt, done);
-      __ delayed()->set(1, dst);
-      __ set(0, dst);
-      __ bind(done);
-    } else {
-      bool need_slow_path = true;
-      if (k->is_loaded()) {
-        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
-          need_slow_path = false;
-        // perform the fast part of the checking logic
-        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
-                                         (need_slow_path ? &done : NULL),
-                                         (need_slow_path ? &done : NULL), NULL,
-                                         RegisterOrConstant(k->super_check_offset()),
-                                         dst);
-      } else {
-        assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
-        // perform the fast part of the checking logic
-        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
-                                         &done, &done, NULL,
-                                         RegisterOrConstant(-1),
-                                         dst);
-      }
-      if (need_slow_path) {
-        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
-        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
-        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-        __ delayed()->nop();
-        __ mov(G3, dst);
-      }
-      __ bind(done);
-    }
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, &failure, &failure);
+    __ bind(failure);
+    __ set(0, dst);
+    __ ba(false, done);
+    __ delayed()->nop();
+    __ bind(success);
+    __ set(1, dst);
+    __ bind(done);
   } else {
     ShouldNotReachHere();
   }
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp	Mon Sep 13 12:10:49 2010 -0700
@@ -75,6 +75,9 @@
   void type_profile_helper(Register mdo, int mdo_offset_bias,
                            ciMethodData *md, ciProfileData *data,
                            Register recv, Register tmp1, Label* update_done);
+  // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+  void setup_md_access(ciMethod* method, int bci,
+                       ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
  public:
   void   pack64(LIR_Opr src, LIR_Opr dst);
   void unpack64(LIR_Opr src, LIR_Opr dst);
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -1047,7 +1047,9 @@
   LIR_Opr tmp1 = FrameMap::G1_oop_opr;
   LIR_Opr tmp2 = FrameMap::G3_oop_opr;
   LIR_Opr tmp3 = FrameMap::G4_oop_opr;
-  __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,  x->direct_compare(), patching_info);
+  __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3,
+                x->direct_compare(), patching_info,
+                x->profiled_method(), x->profiled_bci());
 }
 
 
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -1624,7 +1624,7 @@
     __ jccb(Assembler::notEqual, next_test);
     Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
     __ addptr(data_addr, DataLayout::counter_increment);
-    __ jmpb(*update_done);
+    __ jmp(*update_done);
     __ bind(next_test);
   }
 
@@ -1636,13 +1636,12 @@
     __ jccb(Assembler::notEqual, next_test);
     __ movptr(recv_addr, recv);
     __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment);
-    __ jmpb(*update_done);
+    __ jmp(*update_done);
     __ bind(next_test);
   }
 }
 
-void LIR_Assembler::emit_checkcast(LIR_OpTypeCheck *op) {
-  assert(op->code() == lir_checkcast, "Invalid operation");
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
   // we always need a stub for the failure case.
   CodeStub* stub = op->stub();
   Register obj = op->object()->as_register();
@@ -1666,14 +1665,12 @@
       return;
     }
     data = md->bci_to_data(bci);
-    assert(data != NULL,                "need data for checkcast");
-    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for checkcast");
+    assert(data != NULL,                "need data for type check");
+    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
   }
-  Label profile_cast_failure;
-
-  Label done, done_null;
-  // Where to go in case of cast failure
-  Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+  Label profile_cast_success, profile_cast_failure;
+  Label *success_target = op->should_profile() ? &profile_cast_success : success;
+  Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
 
   if (obj == k_RInfo) {
     k_RInfo = dst;
@@ -1699,23 +1696,23 @@
 
   __ cmpptr(obj, (int32_t)NULL_WORD);
   if (op->should_profile()) {
-    Label profile_done;
-    __ jccb(Assembler::notEqual, profile_done);
-    // Object is null; update methodDataOop
+    Label not_null;
+    __ jccb(Assembler::notEqual, not_null);
+    // Object is null; update MDO and exit
     Register mdo  = klass_RInfo;
     __ movoop(mdo, md->constant_encoding());
     Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
     int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
     __ orl(data_addr, header_bits);
-    __ jmp(done_null);
-    __ bind(profile_done);
+    __ jmp(*obj_is_null);
+    __ bind(not_null);
   } else {
-    __ jcc(Assembler::equal, done_null);
+    __ jcc(Assembler::equal, *obj_is_null);
   }
   __ verify_oop(obj);
 
   if (op->fast_check()) {
-    // get object classo
+    // get object class
     // not a safepoint as obj null check happens earlier
     if (k->is_loaded()) {
 #ifdef _LP64
@@ -1727,6 +1724,7 @@
       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
     }
     __ jcc(Assembler::notEqual, *failure_target);
+    // successful cast, fall through to profile or jump
   } else {
     // get object class
     // not a safepoint as obj null check happens earlier
@@ -1740,16 +1738,17 @@
 #endif // _LP64
       if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
         __ jcc(Assembler::notEqual, *failure_target);
+        // successful cast, fall through to profile or jump
       } else {
         // See if we get an immediate positive hit
-        __ jcc(Assembler::equal, done);
+        __ jcc(Assembler::equal, *success_target);
         // check for self
 #ifdef _LP64
         __ cmpptr(klass_RInfo, k_RInfo);
 #else
         __ cmpoop(klass_RInfo, k->constant_encoding());
 #endif // _LP64
-        __ jcc(Assembler::equal, done);
+        __ jcc(Assembler::equal, *success_target);
 
         __ push(klass_RInfo);
 #ifdef _LP64
@@ -1763,10 +1762,11 @@
         // result is a boolean
         __ cmpl(klass_RInfo, 0);
         __ jcc(Assembler::equal, *failure_target);
+        // successful cast, fall through to profile or jump
       }
     } else {
       // perform the fast part of the checking logic
-      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, failure_target, NULL);
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
       // call out-of-line instance of __ check_klass_subtype_slow_path(...):
       __ push(klass_RInfo);
       __ push(k_RInfo);
@@ -1776,32 +1776,28 @@
       // result is a boolean
       __ cmpl(k_RInfo, 0);
       __ jcc(Assembler::equal, *failure_target);
+      // successful cast, fall through to profile or jump
     }
   }
-  __ bind(done);
-
   if (op->should_profile()) {
     Register mdo  = klass_RInfo, recv = k_RInfo;
+    __ bind(profile_cast_success);
     __ movoop(mdo, md->constant_encoding());
     __ movptr(recv, Address(obj, oopDesc::klass_offset_in_bytes()));
     Label update_done;
-    type_profile_helper(mdo, md, data, recv, &update_done);
-    __ jmpb(update_done);
+    type_profile_helper(mdo, md, data, recv, success);
+    __ jmp(*success);
 
     __ bind(profile_cast_failure);
     __ movoop(mdo, md->constant_encoding());
     Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
     __ subptr(counter_addr, DataLayout::counter_increment);
-    __ jmp(*stub->entry());
-
-    __ bind(update_done);
+    __ jmp(*failure);
   }
-  __ bind(done_null);
-  if (dst != obj) {
-    __ mov(dst, obj);
-  }
+  __ jmp(*success);
 }
 
+
 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
   LIR_Code code = op->code();
   if (code == lir_store_check) {
@@ -1812,9 +1808,44 @@
     Register Rtmp1 = op->tmp3()->as_register();
 
     CodeStub* stub = op->stub();
-    Label done;
+
+    // check if it needs to be profiled
+    ciMethodData* md;
+    ciProfileData* data;
+
+    if (op->should_profile()) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      int bci = op->profiled_bci();
+      md = method->method_data();
+      if (md == NULL) {
+        bailout("out of memory building methodDataOop");
+        return;
+      }
+      data = md->bci_to_data(bci);
+      assert(data != NULL,                "need data for type check");
+      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+    }
+    Label profile_cast_success, profile_cast_failure, done;
+    Label *success_target = op->should_profile() ? &profile_cast_success : &done;
+    Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
     __ cmpptr(value, (int32_t)NULL_WORD);
-    __ jcc(Assembler::equal, done);
+    if (op->should_profile()) {
+      Label not_null;
+      __ jccb(Assembler::notEqual, not_null);
+      // Object is null; update MDO and exit
+      Register mdo  = klass_RInfo;
+      __ movoop(mdo, md->constant_encoding());
+      Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
+      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+      __ orl(data_addr, header_bits);
+      __ jmp(done);
+      __ bind(not_null);
+    } else {
+      __ jcc(Assembler::equal, done);
+    }
+
     add_debug_info_for_null_check_here(op->info_for_exception());
     __ movptr(k_RInfo, Address(array, oopDesc::klass_offset_in_bytes()));
     __ movptr(klass_RInfo, Address(value, oopDesc::klass_offset_in_bytes()));
@@ -1822,7 +1853,7 @@
     // get instance klass
     __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
     // perform the fast part of the checking logic
-    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
     __ push(klass_RInfo);
     __ push(k_RInfo);
@@ -1831,94 +1862,51 @@
     __ pop(k_RInfo);
     // result is a boolean
     __ cmpl(k_RInfo, 0);
-    __ jcc(Assembler::equal, *stub->entry());
-    __ bind(done);
-  } else if (code == lir_instanceof) {
-    Register obj = op->object()->as_register();
-    Register k_RInfo = op->tmp1()->as_register();
-    Register klass_RInfo = op->tmp2()->as_register();
-    Register dst = op->result_opr()->as_register();
-    ciKlass* k = op->klass();
-
-    Label done;
-    Label zero;
-    Label one;
-    if (obj == k_RInfo) {
-      k_RInfo = klass_RInfo;
-      klass_RInfo = obj;
+    __ jcc(Assembler::equal, *failure_target);
+    // fall through to the success case
+
+    if (op->should_profile()) {
+      Register mdo  = klass_RInfo, recv = k_RInfo;
+      __ bind(profile_cast_success);
+      __ movoop(mdo, md->constant_encoding());
+      __ movptr(recv, Address(value, oopDesc::klass_offset_in_bytes()));
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &done);
+      __ jmpb(done);
+
+      __ bind(profile_cast_failure);
+      __ movoop(mdo, md->constant_encoding());
+      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+      __ subptr(counter_addr, DataLayout::counter_increment);
+      __ jmp(*stub->entry());
     }
-    // patching may screw with our temporaries on sparc,
-    // so let's do it before loading the class
-    if (!k->is_loaded()) {
-      jobject2reg_with_patching(k_RInfo, op->info_for_patch());
-    } else {
-      LP64_ONLY(__ movoop(k_RInfo, k->constant_encoding()));
-    }
-    assert(obj != k_RInfo, "must be different");
-
-    __ verify_oop(obj);
-    if (op->fast_check()) {
-      __ cmpptr(obj, (int32_t)NULL_WORD);
-      __ jcc(Assembler::equal, zero);
-      // get object class
-      // not a safepoint as obj null check happens earlier
-      if (LP64_ONLY(false &&) k->is_loaded()) {
-        NOT_LP64(__ cmpoop(Address(obj, oopDesc::klass_offset_in_bytes()), k->constant_encoding()));
-        k_RInfo = noreg;
-      } else {
-        __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
-
+
+    __ bind(done);
+  } else
+    if (code == lir_checkcast) {
+      Register obj = op->object()->as_register();
+      Register dst = op->result_opr()->as_register();
+      Label success;
+      emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+      __ bind(success);
+      if (dst != obj) {
+        __ mov(dst, obj);
       }
-      __ jcc(Assembler::equal, one);
-    } else {
-      // get object class
-      // not a safepoint as obj null check happens earlier
-      __ cmpptr(obj, (int32_t)NULL_WORD);
-      __ jcc(Assembler::equal, zero);
-      __ movptr(klass_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
-
-#ifndef _LP64
-      if (k->is_loaded()) {
-        // See if we get an immediate positive hit
-        __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
-        __ jcc(Assembler::equal, one);
-        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() == k->super_check_offset()) {
-          // check for self
-          __ cmpoop(klass_RInfo, k->constant_encoding());
-          __ jcc(Assembler::equal, one);
-          __ push(klass_RInfo);
-          __ pushoop(k->constant_encoding());
-          __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
-          __ pop(klass_RInfo);
-          __ pop(dst);
-          __ jmp(done);
-        }
+    } else
+      if (code == lir_instanceof) {
+        Register obj = op->object()->as_register();
+        Register dst = op->result_opr()->as_register();
+        Label success, failure, done;
+        emit_typecheck_helper(op, &success, &failure, &failure);
+        __ bind(failure);
+        __ xorptr(dst, dst);
+        __ jmpb(done);
+        __ bind(success);
+        __ movptr(dst, 1);
+        __ bind(done);
+      } else {
+        ShouldNotReachHere();
       }
-        else // next block is unconditional if LP64:
-#endif // LP64
-      {
-        assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
-
-        // perform the fast part of the checking logic
-        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
-        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
-        __ push(klass_RInfo);
-        __ push(k_RInfo);
-        __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
-        __ pop(klass_RInfo);
-        __ pop(dst);
-        __ jmp(done);
-      }
-    }
-    __ bind(zero);
-    __ xorptr(dst, dst);
-    __ jmp(done);
-    __ bind(one);
-    __ movptr(dst, 1);
-    __ bind(done);
-  } else {
-    ShouldNotReachHere();
-  }
 
 }
 
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -1156,10 +1156,10 @@
     patching_info = state_for(x, x->state_before());
   }
   obj.load_item();
-  LIR_Opr tmp = new_register(objectType);
   __ instanceof(reg, obj.result(), x->klass(),
-                tmp, new_register(objectType), LIR_OprFact::illegalOpr,
-                x->direct_compare(), patching_info);
+                new_register(objectType), new_register(objectType),
+                !x->klass()->is_loaded() ? new_register(objectType) : LIR_OprFact::illegalOpr,
+                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
 }
 
 
--- a/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -673,6 +673,8 @@
     } else if (l->as_InstanceOf() != NULL) {
       // NOTE: Code permanently disabled for now since it leaves the old InstanceOf
       //       instruction in the graph (it is pinned). Need to fix this at some point.
+      //       It should also be left in the graph when generating a profiled method version or Goto
+      //       has to know that it was an InstanceOf.
       return;
       // pattern: If ((obj instanceof klass) cond rc) => simplify to: IfInstanceOf or: Goto
       InstanceOf* inst = l->as_InstanceOf();
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -967,6 +967,17 @@
   StoreIndexed* result = new StoreIndexed(array, index, length, type, value, lock_stack());
   append(result);
   _memory->store_value(value);
+
+  if (type == T_OBJECT && is_profiling()) {
+    // Note that we'd collect profile data in this method if we wanted it.
+    compilation()->set_would_profile(true);
+
+    if (profile_checkcasts()) {
+      result->set_profiled_method(method());
+      result->set_profiled_bci(bci());
+      result->set_should_profile(true);
+    }
+  }
 }
 
 
@@ -1852,6 +1863,17 @@
   InstanceOf* i = new InstanceOf(klass, apop(), state_before);
   ipush(append_split(i));
   i->set_direct_compare(direct_compare(klass));
+
+  if (is_profiling()) {
+    // Note that we'd collect profile data in this method if we wanted it.
+    compilation()->set_would_profile(true);
+
+    if (profile_checkcasts()) {
+      i->set_profiled_method(method());
+      i->set_profiled_bci(bci());
+      i->set_should_profile(true);
+    }
+  }
 }
 
 
--- a/hotspot/src/share/vm/c1/c1_Instruction.hpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/share/vm/c1/c1_Instruction.hpp	Mon Sep 13 12:10:49 2010 -0700
@@ -906,11 +906,13 @@
  private:
   Value       _value;
 
+  ciMethod* _profiled_method;
+  int       _profiled_bci;
  public:
   // creation
   StoreIndexed(Value array, Value index, Value length, BasicType elt_type, Value value, ValueStack* lock_stack)
   : AccessIndexed(array, index, length, elt_type, lock_stack)
-  , _value(value)
+  , _value(value), _profiled_method(NULL), _profiled_bci(0)
   {
     set_flag(NeedsWriteBarrierFlag, (as_ValueType(elt_type)->is_object()));
     set_flag(NeedsStoreCheckFlag, (as_ValueType(elt_type)->is_object()));
@@ -923,7 +925,13 @@
   IRScope* scope() const;                        // the state's scope
   bool needs_write_barrier() const               { return check_flag(NeedsWriteBarrierFlag); }
   bool needs_store_check() const                 { return check_flag(NeedsStoreCheckFlag); }
-
+  // Helpers for methodDataOop profiling
+  void set_should_profile(bool value)                { set_flag(ProfileMDOFlag, value); }
+  void set_profiled_method(ciMethod* method)         { _profiled_method = method;   }
+  void set_profiled_bci(int bci)                     { _profiled_bci = bci;         }
+  bool      should_profile() const                   { return check_flag(ProfileMDOFlag); }
+  ciMethod* profiled_method() const                  { return _profiled_method;     }
+  int       profiled_bci() const                     { return _profiled_bci;        }
   // generic
   virtual void input_values_do(ValueVisitor* f)   { AccessIndexed::input_values_do(f); f->visit(&_value); }
 };
@@ -1297,9 +1305,14 @@
   Value       _obj;
   ValueStack* _state_before;
 
+  ciMethod* _profiled_method;
+  int       _profiled_bci;
+
  public:
   // creation
-  TypeCheck(ciKlass* klass, Value obj, ValueType* type, ValueStack* state_before) : StateSplit(type), _klass(klass), _obj(obj), _state_before(state_before) {
+  TypeCheck(ciKlass* klass, Value obj, ValueType* type, ValueStack* state_before)
+  : StateSplit(type), _klass(klass), _obj(obj), _state_before(state_before),
+    _profiled_method(NULL), _profiled_bci(0) {
     ASSERT_VALUES
     set_direct_compare(false);
   }
@@ -1318,20 +1331,22 @@
   virtual bool can_trap() const                  { return true; }
   virtual void input_values_do(ValueVisitor* f)   { StateSplit::input_values_do(f); f->visit(&_obj); }
   virtual void other_values_do(ValueVisitor* f);
+
+  // Helpers for methodDataOop profiling
+  void set_should_profile(bool value)                { set_flag(ProfileMDOFlag, value); }
+  void set_profiled_method(ciMethod* method)         { _profiled_method = method;   }
+  void set_profiled_bci(int bci)                     { _profiled_bci = bci;         }
+  bool      should_profile() const                   { return check_flag(ProfileMDOFlag); }
+  ciMethod* profiled_method() const                  { return _profiled_method;     }
+  int       profiled_bci() const                     { return _profiled_bci;        }
 };
 
 
 LEAF(CheckCast, TypeCheck)
- private:
-  ciMethod* _profiled_method;
-  int       _profiled_bci;
-
  public:
   // creation
   CheckCast(ciKlass* klass, Value obj, ValueStack* state_before)
-  : TypeCheck(klass, obj, objectType, state_before)
-  , _profiled_method(NULL)
-  , _profiled_bci(0) {}
+  : TypeCheck(klass, obj, objectType, state_before) {}
 
   void set_incompatible_class_change_check() {
     set_flag(ThrowIncompatibleClassChangeErrorFlag, true);
@@ -1340,17 +1355,8 @@
     return check_flag(ThrowIncompatibleClassChangeErrorFlag);
   }
 
-  // Helpers for methodDataOop profiling
-  void set_should_profile(bool value)                { set_flag(ProfileMDOFlag, value); }
-  void set_profiled_method(ciMethod* method)         { _profiled_method = method;   }
-  void set_profiled_bci(int bci)                     { _profiled_bci = bci;         }
-  bool      should_profile() const                   { return check_flag(ProfileMDOFlag); }
-  ciMethod* profiled_method() const                  { return _profiled_method;     }
-  int       profiled_bci() const                     { return _profiled_bci;        }
-
   ciType* declared_type() const;
   ciType* exact_type() const;
-
 };
 
 
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Mon Sep 13 12:10:49 2010 -0700
@@ -1019,11 +1019,7 @@
 }
 
 void LIR_OpTypeCheck::emit_code(LIR_Assembler* masm) {
-  if (code() == lir_checkcast) {
-    masm->emit_checkcast(this);
-  } else {
-    masm->emit_opTypeCheck(this);
-  }
+  masm->emit_opTypeCheck(this);
   if (stub()) {
     masm->emit_code_stub(stub());
   }
@@ -1380,8 +1376,14 @@
   append(c);
 }
 
-void LIR_List::instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch) {
-  append(new LIR_OpTypeCheck(lir_instanceof, result, object, klass, tmp1, tmp2, tmp3, fast_check, NULL, info_for_patch, NULL));
+void LIR_List::instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci) {
+  LIR_OpTypeCheck* c = new LIR_OpTypeCheck(lir_instanceof, result, object, klass, tmp1, tmp2, tmp3, fast_check, NULL, info_for_patch, NULL);
+  if (profiled_method != NULL) {
+    c->set_profiled_method(profiled_method);
+    c->set_profiled_bci(profiled_bci);
+    c->set_should_profile(true);
+  }
+  append(c);
 }
 
 
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp	Mon Sep 13 12:10:49 2010 -0700
@@ -2041,7 +2041,7 @@
 
   void fpop_raw()                                { append(new LIR_Op0(lir_fpop_raw)); }
 
-  void instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch);
+  void instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci);
   void store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, CodeEmitInfo* info_for_exception);
 
   void checkcast (LIR_Opr result, LIR_Opr object, ciKlass* klass,
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp	Sat Sep 11 15:21:37 2010 -0700
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp	Mon Sep 13 12:10:49 2010 -0700
@@ -187,7 +187,7 @@
   void emit_alloc_obj(LIR_OpAllocObj* op);
   void emit_alloc_array(LIR_OpAllocArray* op);
   void emit_opTypeCheck(LIR_OpTypeCheck* op);
-  void emit_checkcast(LIR_OpTypeCheck* op);
+  void emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null);
   void emit_compare_and_swap(LIR_OpCompareAndSwap* op);
   void emit_lock(LIR_OpLock* op);
   void emit_call(LIR_OpJavaCall* op);