hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp
changeset 22872 b6902ee5bc8d
parent 22234 da823d78ad65
parent 22868 7f6eb436873b
child 22878 206db4a4adcc
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Tue Jan 28 11:21:43 2014 -0800
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Tue Jan 28 12:25:34 2014 -0800
@@ -28,14 +28,16 @@
 #include "interpreter/bytecodeHistogram.hpp"
 #include "interpreter/bytecodeInterpreter.hpp"
 #include "interpreter/bytecodeInterpreter.inline.hpp"
+#include "interpreter/bytecodeInterpreterProfiling.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interpreterRuntime.hpp"
-#include "memory/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/methodCounters.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/biasedLocking.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
@@ -66,6 +68,9 @@
 #ifdef TARGET_OS_ARCH_linux_ppc
 # include "orderAccess_linux_ppc.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_aix_ppc
+# include "orderAccess_aix_ppc.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_bsd_x86
 # include "orderAccess_bsd_x86.inline.hpp"
 #endif
@@ -138,19 +143,20 @@
  * is no entry point to do the transition to vm so we just
  * do it by hand here.
  */
-#define VM_JAVA_ERROR_NO_JUMP(name, msg)                                          \
+#define VM_JAVA_ERROR_NO_JUMP(name, msg, note_a_trap)                             \
     DECACHE_STATE();                                                              \
     SET_LAST_JAVA_FRAME();                                                        \
     {                                                                             \
+       InterpreterRuntime::note_a_trap(THREAD, istate->method(), BCI());          \
        ThreadInVMfromJava trans(THREAD);                                          \
        Exceptions::_throw_msg(THREAD, __FILE__, __LINE__, name, msg);             \
     }                                                                             \
     RESET_LAST_JAVA_FRAME();                                                      \
     CACHE_STATE();
 
-// Normal throw of a java error
-#define VM_JAVA_ERROR(name, msg)                                                  \
-    VM_JAVA_ERROR_NO_JUMP(name, msg)                                              \
+// Normal throw of a java error.
+#define VM_JAVA_ERROR(name, msg, note_a_trap)                                     \
+    VM_JAVA_ERROR_NO_JUMP(name, msg, note_a_trap)                                 \
     goto handle_exception;
 
 #ifdef PRODUCT
@@ -197,6 +203,10 @@
               !THREAD->pop_frame_in_process()) {                                 \
             goto handle_Pop_Frame;                                               \
           }                                                                      \
+          if (THREAD->jvmti_thread_state() &&                                    \
+              THREAD->jvmti_thread_state()->is_earlyret_pending()) {             \
+            goto handle_Early_Return;                                            \
+          }                                                                      \
           opcode = *pc;                                                          \
         }                                                                        \
       }                                                                          \
@@ -332,12 +342,30 @@
       if (UseLoopCounter) {                                                                         \
         bool do_OSR = UseOnStackReplacement;                                                        \
         mcs->backedge_counter()->increment();                                                       \
-        if (do_OSR) do_OSR = mcs->backedge_counter()->reached_InvocationLimit();                    \
+        if (ProfileInterpreter) {                                                                   \
+          BI_PROFILE_GET_OR_CREATE_METHOD_DATA(handle_exception);                                   \
+          /* Check for overflow against MDO count. */                                               \
+          do_OSR = do_OSR                                                                           \
+            && (mdo_last_branch_taken_count >= (uint)InvocationCounter::InterpreterBackwardBranchLimit)\
+            /* When ProfileInterpreter is on, the backedge_count comes     */                       \
+            /* from the methodDataOop, which value does not get reset on   */                       \
+            /* the call to frequency_counter_overflow(). To avoid          */                       \
+            /* excessive calls to the overflow routine while the method is */                       \
+            /* being compiled, add a second test to make sure the overflow */                       \
+            /* function is called only once every overflow_frequency.      */                       \
+            && (!(mdo_last_branch_taken_count & 1023));                                             \
+        } else {                                                                                    \
+          /* check for overflow of backedge counter */                                              \
+          do_OSR = do_OSR                                                                           \
+            && mcs->invocation_counter()->reached_InvocationLimit(mcs->backedge_counter());         \
+        }                                                                                           \
         if (do_OSR) {                                                                               \
-          nmethod*  osr_nmethod;                                                                    \
+          nmethod* osr_nmethod;                                                                     \
           OSR_REQUEST(osr_nmethod, branch_pc);                                                      \
           if (osr_nmethod != NULL && osr_nmethod->osr_entry_bci() != InvalidOSREntryBci) {          \
-            intptr_t* buf = SharedRuntime::OSR_migration_begin(THREAD);                             \
+            intptr_t* buf;                                                                          \
+            /* Call OSR migration with last java frame only, no checks. */                          \
+            CALL_VM_NAKED_LJF(buf=SharedRuntime::OSR_migration_begin(THREAD));                      \
             istate->set_msg(do_osr);                                                                \
             istate->set_osr_buf((address)buf);                                                      \
             istate->set_osr_entry(osr_nmethod->osr_entry());                                        \
@@ -345,7 +373,6 @@
           }                                                                                         \
         }                                                                                           \
       }  /* UseCompiler ... */                                                                      \
-      mcs->invocation_counter()->increment();                                                       \
       SAFEPOINT;                                                                                    \
     }
 
@@ -378,17 +405,21 @@
 #undef CACHE_FRAME
 #define CACHE_FRAME()
 
+// BCI() returns the current bytecode-index.
+#undef  BCI
+#define BCI()           ((int)(intptr_t)(pc - (intptr_t)istate->method()->code_base()))
+
 /*
  * CHECK_NULL - Macro for throwing a NullPointerException if the object
  * passed is a null ref.
  * On some architectures/platforms it should be possible to do this implicitly
  */
 #undef CHECK_NULL
-#define CHECK_NULL(obj_)                                                 \
-    if ((obj_) == NULL) {                                                \
-        VM_JAVA_ERROR(vmSymbols::java_lang_NullPointerException(), "");  \
-    }                                                                    \
-    VERIFY_OOP(obj_)
+#define CHECK_NULL(obj_)                                                                       \
+        if ((obj_) == NULL) {                                                                  \
+          VM_JAVA_ERROR(vmSymbols::java_lang_NullPointerException(), "", note_nullCheck_trap); \
+        }                                                                                      \
+        VERIFY_OOP(obj_)
 
 #define VMdoubleConstZero() 0.0
 #define VMdoubleConstOne() 1.0
@@ -410,22 +441,30 @@
         CACHE_CP();     \
         CACHE_LOCALS();
 
-// Call the VM don't check for pending exceptions
-#define CALL_VM_NOCHECK(func)                                     \
-          DECACHE_STATE();                                        \
-          SET_LAST_JAVA_FRAME();                                  \
-          func;                                                   \
-          RESET_LAST_JAVA_FRAME();                                \
-          CACHE_STATE();                                          \
-          if (THREAD->pop_frame_pending() &&                      \
-              !THREAD->pop_frame_in_process()) {                  \
-            goto handle_Pop_Frame;                                \
-          }
+// Call the VM with last java frame only.
+#define CALL_VM_NAKED_LJF(func)                                    \
+        DECACHE_STATE();                                           \
+        SET_LAST_JAVA_FRAME();                                     \
+        func;                                                      \
+        RESET_LAST_JAVA_FRAME();                                   \
+        CACHE_STATE();
+
+// Call the VM. Don't check for pending exceptions.
+#define CALL_VM_NOCHECK(func)                                      \
+        CALL_VM_NAKED_LJF(func)                                    \
+        if (THREAD->pop_frame_pending() &&                         \
+            !THREAD->pop_frame_in_process()) {                     \
+          goto handle_Pop_Frame;                                   \
+        }                                                          \
+        if (THREAD->jvmti_thread_state() &&                        \
+            THREAD->jvmti_thread_state()->is_earlyret_pending()) { \
+          goto handle_Early_Return;                                \
+        }
 
 // Call the VM and check for pending exceptions
-#define CALL_VM(func, label) {                                    \
-          CALL_VM_NOCHECK(func);                                  \
-          if (THREAD->has_pending_exception()) goto label;        \
+#define CALL_VM(func, label) {                                     \
+          CALL_VM_NOCHECK(func);                                   \
+          if (THREAD->has_pending_exception()) goto label;         \
         }
 
 /*
@@ -502,8 +541,6 @@
   interpreterState orig = istate;
 #endif
 
-  static volatile jbyte* _byte_map_base; // adjusted card table base for oop store barrier
-
   register intptr_t*        topOfStack = (intptr_t *)istate->stack(); /* access with STACK macros */
   register address          pc = istate->bcp();
   register jubyte opcode;
@@ -511,12 +548,9 @@
   register ConstantPoolCache*    cp = istate->constants(); // method()->constants()->cache()
 #ifdef LOTS_OF_REGS
   register JavaThread*      THREAD = istate->thread();
-  register volatile jbyte*  BYTE_MAP_BASE = _byte_map_base;
 #else
 #undef THREAD
 #define THREAD istate->thread()
-#undef BYTE_MAP_BASE
-#define BYTE_MAP_BASE _byte_map_base
 #endif
 
 #ifdef USELABELS
@@ -622,16 +656,20 @@
          topOfStack < istate->stack_base(),
          "Stack top out of range");
 
+#ifdef CC_INTERP_PROFILE
+  // MethodData's last branch taken count.
+  uint mdo_last_branch_taken_count = 0;
+#else
+  const uint mdo_last_branch_taken_count = 0;
+#endif
+
   switch (istate->msg()) {
     case initialize: {
-      if (initialized++) ShouldNotReachHere(); // Only one initialize call
+      if (initialized++) ShouldNotReachHere(); // Only one initialize call.
       _compiling = (UseCompiler || CountCompiledCalls);
 #ifdef VM_JVMTI
       _jvmti_interp_events = JvmtiExport::can_post_interpreter_events();
 #endif
-      BarrierSet* bs = Universe::heap()->barrier_set();
-      assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-      _byte_map_base = (volatile jbyte*)(((CardTableModRefBS*)bs)->byte_map_base);
       return;
     }
     break;
@@ -646,15 +684,12 @@
           METHOD->increment_interpreter_invocation_count(THREAD);
         }
         mcs->invocation_counter()->increment();
-        if (mcs->invocation_counter()->reached_InvocationLimit()) {
-            CALL_VM((void)InterpreterRuntime::frequency_counter_overflow(THREAD, NULL), handle_exception);
-
-            // We no longer retry on a counter overflow
-
-            // istate->set_msg(retry_method);
-            // THREAD->clr_do_not_unlock();
-            // return;
+        if (mcs->invocation_counter()->reached_InvocationLimit(mcs->backedge_counter())) {
+          CALL_VM((void)InterpreterRuntime::frequency_counter_overflow(THREAD, NULL), handle_exception);
+          // We no longer retry on a counter overflow.
         }
+        // Get or create profile data. Check for pending (async) exceptions.
+        BI_PROFILE_GET_OR_CREATE_METHOD_DATA(handle_exception);
         SAFEPOINT;
       }
 
@@ -676,117 +711,99 @@
       }
 #endif // HACK
 
-
-      // lock method if synchronized
+      // Lock method if synchronized.
       if (METHOD->is_synchronized()) {
-          // oop rcvr = locals[0].j.r;
-          oop rcvr;
-          if (METHOD->is_static()) {
-            rcvr = METHOD->constants()->pool_holder()->java_mirror();
-          } else {
-            rcvr = LOCALS_OBJECT(0);
-            VERIFY_OOP(rcvr);
-          }
-          // The initial monitor is ours for the taking
-          BasicObjectLock* mon = &istate->monitor_base()[-1];
-          oop monobj = mon->obj();
-          assert(mon->obj() == rcvr, "method monitor mis-initialized");
-
-          bool success = UseBiasedLocking;
-          if (UseBiasedLocking) {
-            markOop mark = rcvr->mark();
-            if (mark->has_bias_pattern()) {
-              // The bias pattern is present in the object's header. Need to check
-              // whether the bias owner and the epoch are both still current.
-              intptr_t xx = ((intptr_t) THREAD) ^ (intptr_t) mark;
-              xx = (intptr_t) rcvr->klass()->prototype_header() ^ xx;
-              intptr_t yy = (xx & ~((int) markOopDesc::age_mask_in_place));
-              if (yy != 0 ) {
-                // At this point we know that the header has the bias pattern and
-                // that we are not the bias owner in the current epoch. We need to
-                // figure out more details about the state of the header in order to
-                // know what operations can be legally performed on the object's
-                // header.
-
-                // If the low three bits in the xor result aren't clear, that means
-                // the prototype header is no longer biased and we have to revoke
-                // the bias on this object.
-
-                if (yy & markOopDesc::biased_lock_mask_in_place == 0 ) {
-                  // Biasing is still enabled for this data type. See whether the
-                  // epoch of the current bias is still valid, meaning that the epoch
-                  // bits of the mark word are equal to the epoch bits of the
-                  // prototype header. (Note that the prototype header's epoch bits
-                  // only change at a safepoint.) If not, attempt to rebias the object
-                  // toward the current thread. Note that we must be absolutely sure
-                  // that the current epoch is invalid in order to do this because
-                  // otherwise the manipulations it performs on the mark word are
-                  // illegal.
-                  if (yy & markOopDesc::epoch_mask_in_place == 0) {
-                    // The epoch of the current bias is still valid but we know nothing
-                    // about the owner; it might be set or it might be clear. Try to
-                    // acquire the bias of the object using an atomic operation. If this
-                    // fails we will go in to the runtime to revoke the object's bias.
-                    // Note that we first construct the presumed unbiased header so we
-                    // don't accidentally blow away another thread's valid bias.
-                    intptr_t unbiased = (intptr_t) mark & (markOopDesc::biased_lock_mask_in_place |
-                                                           markOopDesc::age_mask_in_place |
-                                                           markOopDesc::epoch_mask_in_place);
-                    if (Atomic::cmpxchg_ptr((intptr_t)THREAD | unbiased, (intptr_t*) rcvr->mark_addr(), unbiased) != unbiased) {
-                      CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
-                    }
-                  } else {
-                    try_rebias:
-                    // At this point we know the epoch has expired, meaning that the
-                    // current "bias owner", if any, is actually invalid. Under these
-                    // circumstances _only_, we are allowed to use the current header's
-                    // value as the comparison value when doing the cas to acquire the
-                    // bias in the current epoch. In other words, we allow transfer of
-                    // the bias from one thread to another directly in this situation.
-                    xx = (intptr_t) rcvr->klass()->prototype_header() | (intptr_t) THREAD;
-                    if (Atomic::cmpxchg_ptr((intptr_t)THREAD | (intptr_t) rcvr->klass()->prototype_header(),
-                                            (intptr_t*) rcvr->mark_addr(),
-                                            (intptr_t) mark) != (intptr_t) mark) {
-                      CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
-                    }
-                  }
-                } else {
-                  try_revoke_bias:
-                  // The prototype mark in the klass doesn't have the bias bit set any
-                  // more, indicating that objects of this data type are not supposed
-                  // to be biased any more. We are going to try to reset the mark of
-                  // this object to the prototype value and fall through to the
-                  // CAS-based locking scheme. Note that if our CAS fails, it means
-                  // that another thread raced us for the privilege of revoking the
-                  // bias of this particular object, so it's okay to continue in the
-                  // normal locking code.
-                  //
-                  xx = (intptr_t) rcvr->klass()->prototype_header() | (intptr_t) THREAD;
-                  if (Atomic::cmpxchg_ptr(rcvr->klass()->prototype_header(),
-                                          (intptr_t*) rcvr->mark_addr(),
-                                          mark) == mark) {
-                    // (*counters->revoked_lock_entry_count_addr())++;
-                  success = false;
-                  }
-                }
+        // oop rcvr = locals[0].j.r;
+        oop rcvr;
+        if (METHOD->is_static()) {
+          rcvr = METHOD->constants()->pool_holder()->java_mirror();
+        } else {
+          rcvr = LOCALS_OBJECT(0);
+          VERIFY_OOP(rcvr);
+        }
+        // The initial monitor is ours for the taking.
+        // Monitor not filled in frame manager any longer as this caused race condition with biased locking.
+        BasicObjectLock* mon = &istate->monitor_base()[-1];
+        mon->set_obj(rcvr);
+        bool success = false;
+        uintptr_t epoch_mask_in_place = (uintptr_t)markOopDesc::epoch_mask_in_place;
+        markOop mark = rcvr->mark();
+        intptr_t hash = (intptr_t) markOopDesc::no_hash;
+        // Implies UseBiasedLocking.
+        if (mark->has_bias_pattern()) {
+          uintptr_t thread_ident;
+          uintptr_t anticipated_bias_locking_value;
+          thread_ident = (uintptr_t)istate->thread();
+          anticipated_bias_locking_value =
+            (((uintptr_t)rcvr->klass()->prototype_header() | thread_ident) ^ (uintptr_t)mark) &
+            ~((uintptr_t) markOopDesc::age_mask_in_place);
+
+          if (anticipated_bias_locking_value == 0) {
+            // Already biased towards this thread, nothing to do.
+            if (PrintBiasedLockingStatistics) {
+              (* BiasedLocking::biased_lock_entry_count_addr())++;
+            }
+            success = true;
+          } else if ((anticipated_bias_locking_value & markOopDesc::biased_lock_mask_in_place) != 0) {
+            // Try to revoke bias.
+            markOop header = rcvr->klass()->prototype_header();
+            if (hash != markOopDesc::no_hash) {
+              header = header->copy_set_hash(hash);
+            }
+            if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), mark) == mark) {
+              if (PrintBiasedLockingStatistics)
+                (*BiasedLocking::revoked_lock_entry_count_addr())++;
+            }
+          } else if ((anticipated_bias_locking_value & epoch_mask_in_place) != 0) {
+            // Try to rebias.
+            markOop new_header = (markOop) ( (intptr_t) rcvr->klass()->prototype_header() | thread_ident);
+            if (hash != markOopDesc::no_hash) {
+              new_header = new_header->copy_set_hash(hash);
+            }
+            if (Atomic::cmpxchg_ptr((void*)new_header, rcvr->mark_addr(), mark) == mark) {
+              if (PrintBiasedLockingStatistics) {
+                (* BiasedLocking::rebiased_lock_entry_count_addr())++;
               }
             } else {
-              cas_label:
-              success = false;
+              CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
+            }
+            success = true;
+          } else {
+            // Try to bias towards thread in case object is anonymously biased.
+            markOop header = (markOop) ((uintptr_t) mark &
+                                        ((uintptr_t)markOopDesc::biased_lock_mask_in_place |
+                                         (uintptr_t)markOopDesc::age_mask_in_place | epoch_mask_in_place));
+            if (hash != markOopDesc::no_hash) {
+              header = header->copy_set_hash(hash);
+            }
+            markOop new_header = (markOop) ((uintptr_t) header | thread_ident);
+            // Debugging hint.
+            DEBUG_ONLY(mon->lock()->set_displaced_header((markOop) (uintptr_t) 0xdeaddead);)
+            if (Atomic::cmpxchg_ptr((void*)new_header, rcvr->mark_addr(), header) == header) {
+              if (PrintBiasedLockingStatistics) {
+                (* BiasedLocking::anonymously_biased_lock_entry_count_addr())++;
+              }
+            } else {
+              CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
+            }
+            success = true;
+          }
+        }
+
+        // Traditional lightweight locking.
+        if (!success) {
+          markOop displaced = rcvr->mark()->set_unlocked();
+          mon->lock()->set_displaced_header(displaced);
+          bool call_vm = UseHeavyMonitors;
+          if (call_vm || Atomic::cmpxchg_ptr(mon, rcvr->mark_addr(), displaced) != displaced) {
+            // Is it simple recursive case?
+            if (!call_vm && THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
+              mon->lock()->set_displaced_header(NULL);
+            } else {
+              CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
             }
           }
-          if (!success) {
-            markOop displaced = rcvr->mark()->set_unlocked();
-            mon->lock()->set_displaced_header(displaced);
-            if (Atomic::cmpxchg_ptr(mon, rcvr->mark_addr(), displaced) != displaced) {
-              // Is it simple recursive case?
-              if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
-                mon->lock()->set_displaced_header(NULL);
-              } else {
-                CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
-              }
-            }
-          }
+        }
       }
       THREAD->clr_do_not_unlock();
 
@@ -808,9 +825,14 @@
     case popping_frame: {
       // returned from a java call to pop the frame, restart the call
       // clear the message so we don't confuse ourselves later
-      ShouldNotReachHere();  // we don't return this.
       assert(THREAD->pop_frame_in_process(), "wrong frame pop state");
       istate->set_msg(no_request);
+      if (_compiling) {
+        // Set MDX back to the ProfileData of the invoke bytecode that will be
+        // restarted.
+        SET_MDX(NULL);
+        BI_PROFILE_GET_OR_CREATE_METHOD_DATA(handle_exception);
+      }
       THREAD->clr_pop_frame_in_process();
       goto run;
     }
@@ -836,10 +858,19 @@
       if (THREAD->pop_frame_pending() && !THREAD->pop_frame_in_process()) {
         goto handle_Pop_Frame;
       }
+      if (THREAD->jvmti_thread_state() &&
+          THREAD->jvmti_thread_state()->is_earlyret_pending()) {
+        goto handle_Early_Return;
+      }
 
       if (THREAD->has_pending_exception()) goto handle_exception;
       // Update the pc by the saved amount of the invoke bytecode size
       UPDATE_PC(istate->bcp_advance());
+
+      if (_compiling) {
+        // Get or create profile data. Check for pending (async) exceptions.
+        BI_PROFILE_GET_OR_CREATE_METHOD_DATA(handle_exception);
+      }
       goto run;
     }
 
@@ -847,6 +878,11 @@
       // Returned from an opcode that will reexecute. Deopt was
       // a result of a PopFrame request.
       //
+
+      if (_compiling) {
+        // Get or create profile data. Check for pending (async) exceptions.
+        BI_PROFILE_GET_OR_CREATE_METHOD_DATA(handle_exception);
+      }
       goto run;
     }
 
@@ -869,6 +905,11 @@
       }
       UPDATE_PC(Bytecodes::length_at(METHOD, pc));
       if (THREAD->has_pending_exception()) goto handle_exception;
+
+      if (_compiling) {
+        // Get or create profile data. Check for pending (async) exceptions.
+        BI_PROFILE_GET_OR_CREATE_METHOD_DATA(handle_exception);
+      }
       goto run;
     }
     case got_monitors: {
@@ -881,15 +922,84 @@
       BasicObjectLock* entry = (BasicObjectLock*) istate->stack_base();
       assert(entry->obj() == NULL, "Frame manager didn't allocate the monitor");
       entry->set_obj(lockee);
-
-      markOop displaced = lockee->mark()->set_unlocked();
-      entry->lock()->set_displaced_header(displaced);
-      if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
-        // Is it simple recursive case?
-        if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
-          entry->lock()->set_displaced_header(NULL);
+      bool success = false;
+      uintptr_t epoch_mask_in_place = (uintptr_t)markOopDesc::epoch_mask_in_place;
+
+      markOop mark = lockee->mark();
+      intptr_t hash = (intptr_t) markOopDesc::no_hash;
+      // implies UseBiasedLocking
+      if (mark->has_bias_pattern()) {
+        uintptr_t thread_ident;
+        uintptr_t anticipated_bias_locking_value;
+        thread_ident = (uintptr_t)istate->thread();
+        anticipated_bias_locking_value =
+          (((uintptr_t)lockee->klass()->prototype_header() | thread_ident) ^ (uintptr_t)mark) &
+          ~((uintptr_t) markOopDesc::age_mask_in_place);
+
+        if  (anticipated_bias_locking_value == 0) {
+          // already biased towards this thread, nothing to do
+          if (PrintBiasedLockingStatistics) {
+            (* BiasedLocking::biased_lock_entry_count_addr())++;
+          }
+          success = true;
+        } else if ((anticipated_bias_locking_value & markOopDesc::biased_lock_mask_in_place) != 0) {
+          // try revoke bias
+          markOop header = lockee->klass()->prototype_header();
+          if (hash != markOopDesc::no_hash) {
+            header = header->copy_set_hash(hash);
+          }
+          if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), mark) == mark) {
+            if (PrintBiasedLockingStatistics) {
+              (*BiasedLocking::revoked_lock_entry_count_addr())++;
+            }
+          }
+        } else if ((anticipated_bias_locking_value & epoch_mask_in_place) !=0) {
+          // try rebias
+          markOop new_header = (markOop) ( (intptr_t) lockee->klass()->prototype_header() | thread_ident);
+          if (hash != markOopDesc::no_hash) {
+                new_header = new_header->copy_set_hash(hash);
+          }
+          if (Atomic::cmpxchg_ptr((void*)new_header, lockee->mark_addr(), mark) == mark) {
+            if (PrintBiasedLockingStatistics) {
+              (* BiasedLocking::rebiased_lock_entry_count_addr())++;
+            }
+          } else {
+            CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+          }
+          success = true;
         } else {
-          CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+          // try to bias towards thread in case object is anonymously biased
+          markOop header = (markOop) ((uintptr_t) mark & ((uintptr_t)markOopDesc::biased_lock_mask_in_place |
+                                                          (uintptr_t)markOopDesc::age_mask_in_place | epoch_mask_in_place));
+          if (hash != markOopDesc::no_hash) {
+            header = header->copy_set_hash(hash);
+          }
+          markOop new_header = (markOop) ((uintptr_t) header | thread_ident);
+          // debugging hint
+          DEBUG_ONLY(entry->lock()->set_displaced_header((markOop) (uintptr_t) 0xdeaddead);)
+          if (Atomic::cmpxchg_ptr((void*)new_header, lockee->mark_addr(), header) == header) {
+            if (PrintBiasedLockingStatistics) {
+              (* BiasedLocking::anonymously_biased_lock_entry_count_addr())++;
+            }
+          } else {
+            CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+          }
+          success = true;
+        }
+      }
+
+      // traditional lightweight locking
+      if (!success) {
+        markOop displaced = lockee->mark()->set_unlocked();
+        entry->lock()->set_displaced_header(displaced);
+        bool call_vm = UseHeavyMonitors;
+        if (call_vm || Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
+          // Is it simple recursive case?
+          if (!call_vm && THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
+            entry->lock()->set_displaced_header(NULL);
+          } else {
+            CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+          }
         }
       }
       UPDATE_PC_AND_TOS(1, -1);
@@ -1050,6 +1160,11 @@
           uint16_t reg = Bytes::get_Java_u2(pc + 2);
 
           opcode = pc[1];
+
+          // Wide and it's sub-bytecode are counted as separate instructions. If we
+          // don't account for this here, the bytecode trace skips the next bytecode.
+          DO_UPDATE_INSTRUCTION_COUNT(opcode);
+
           switch(opcode) {
               case Bytecodes::_aload:
                   VERIFY_OOP(LOCALS_OBJECT(reg));
@@ -1093,10 +1208,13 @@
                   UPDATE_PC_AND_CONTINUE(6);
               }
               case Bytecodes::_ret:
+                  // Profile ret.
+                  BI_PROFILE_UPDATE_RET(/*bci=*/((int)(intptr_t)(LOCALS_ADDR(reg))));
+                  // Now, update the pc.
                   pc = istate->method()->code_base() + (intptr_t)(LOCALS_ADDR(reg));
                   UPDATE_PC_AND_CONTINUE(0);
               default:
-                  VM_JAVA_ERROR(vmSymbols::java_lang_InternalError(), "undefined opcode");
+                  VM_JAVA_ERROR(vmSymbols::java_lang_InternalError(), "undefined opcode", note_no_trap);
           }
       }
 
@@ -1177,7 +1295,7 @@
       CASE(_i##opcname):                                                \
           if (test && (STACK_INT(-1) == 0)) {                           \
               VM_JAVA_ERROR(vmSymbols::java_lang_ArithmeticException(), \
-                            "/ by zero");                               \
+                            "/ by zero", note_div0Check_trap);          \
           }                                                             \
           SET_STACK_INT(VMint##opname(STACK_INT(-2),                    \
                                       STACK_INT(-1)),                   \
@@ -1189,7 +1307,7 @@
             jlong l1 = STACK_LONG(-1);                                  \
             if (VMlongEqz(l1)) {                                        \
               VM_JAVA_ERROR(vmSymbols::java_lang_ArithmeticException(), \
-                            "/ by long zero");                          \
+                            "/ by long zero", note_div0Check_trap);     \
             }                                                           \
           }                                                             \
           /* First long at (-1,-2) next long at (-3,-4) */              \
@@ -1402,17 +1520,23 @@
 
 #define COMPARISON_OP(name, comparison)                                      \
       CASE(_if_icmp##name): {                                                \
-          int skip = (STACK_INT(-2) comparison STACK_INT(-1))                \
+          const bool cmp = (STACK_INT(-2) comparison STACK_INT(-1));         \
+          int skip = cmp                                                     \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
+          /* Profile branch. */                                              \
+          BI_PROFILE_UPDATE_BRANCH(/*is_taken=*/cmp);                        \
           UPDATE_PC_AND_TOS(skip, -2);                                       \
           DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
           CONTINUE;                                                          \
       }                                                                      \
       CASE(_if##name): {                                                     \
-          int skip = (STACK_INT(-1) comparison 0)                            \
+          const bool cmp = (STACK_INT(-1) comparison 0);                     \
+          int skip = cmp                                                     \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
+          /* Profile branch. */                                              \
+          BI_PROFILE_UPDATE_BRANCH(/*is_taken=*/cmp);                        \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
           DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
           CONTINUE;                                                          \
@@ -1421,9 +1545,12 @@
 #define COMPARISON_OP2(name, comparison)                                     \
       COMPARISON_OP(name, comparison)                                        \
       CASE(_if_acmp##name): {                                                \
-          int skip = (STACK_OBJECT(-2) comparison STACK_OBJECT(-1))          \
+          const bool cmp = (STACK_OBJECT(-2) comparison STACK_OBJECT(-1));   \
+          int skip = cmp                                                     \
                        ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;            \
           address branch_pc = pc;                                            \
+          /* Profile branch. */                                              \
+          BI_PROFILE_UPDATE_BRANCH(/*is_taken=*/cmp);                        \
           UPDATE_PC_AND_TOS(skip, -2);                                       \
           DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
           CONTINUE;                                                          \
@@ -1431,9 +1558,12 @@
 
 #define NULL_COMPARISON_NOT_OP(name)                                         \
       CASE(_if##name): {                                                     \
-          int skip = (!(STACK_OBJECT(-1) == NULL))                           \
+          const bool cmp = (!(STACK_OBJECT(-1) == NULL));                    \
+          int skip = cmp                                                     \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
+          /* Profile branch. */                                              \
+          BI_PROFILE_UPDATE_BRANCH(/*is_taken=*/cmp);                        \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
           DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
           CONTINUE;                                                          \
@@ -1441,9 +1571,12 @@
 
 #define NULL_COMPARISON_OP(name)                                             \
       CASE(_if##name): {                                                     \
-          int skip = ((STACK_OBJECT(-1) == NULL))                            \
+          const bool cmp = ((STACK_OBJECT(-1) == NULL));                     \
+          int skip = cmp                                                     \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
+          /* Profile branch. */                                              \
+          BI_PROFILE_UPDATE_BRANCH(/*is_taken=*/cmp);                        \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
           DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
           CONTINUE;                                                          \
@@ -1466,30 +1599,42 @@
           int32_t  high = Bytes::get_Java_u4((address)&lpc[2]);
           int32_t  skip;
           key -= low;
-          skip = ((uint32_t) key > (uint32_t)(high - low))
-                      ? Bytes::get_Java_u4((address)&lpc[0])
-                      : Bytes::get_Java_u4((address)&lpc[key + 3]);
-          // Does this really need a full backedge check (osr?)
+          if (((uint32_t) key > (uint32_t)(high - low))) {
+            key = -1;
+            skip = Bytes::get_Java_u4((address)&lpc[0]);
+          } else {
+            skip = Bytes::get_Java_u4((address)&lpc[key + 3]);
+          }
+          // Profile switch.
+          BI_PROFILE_UPDATE_SWITCH(/*switch_index=*/key);
+          // Does this really need a full backedge check (osr)?
           address branch_pc = pc;
           UPDATE_PC_AND_TOS(skip, -1);
           DO_BACKEDGE_CHECKS(skip, branch_pc);
           CONTINUE;
       }
 
-      /* Goto pc whose table entry matches specified key */
+      /* Goto pc whose table entry matches specified key. */
 
       CASE(_lookupswitch): {
           jint* lpc  = (jint*)VMalignWordUp(pc+1);
           int32_t  key  = STACK_INT(-1);
           int32_t  skip = Bytes::get_Java_u4((address) lpc); /* default amount */
+          // Remember index.
+          int      index = -1;
+          int      newindex = 0;
           int32_t  npairs = Bytes::get_Java_u4((address) &lpc[1]);
           while (--npairs >= 0) {
-              lpc += 2;
-              if (key == (int32_t)Bytes::get_Java_u4((address)lpc)) {
-                  skip = Bytes::get_Java_u4((address)&lpc[1]);
-                  break;
-              }
+            lpc += 2;
+            if (key == (int32_t)Bytes::get_Java_u4((address)lpc)) {
+              skip = Bytes::get_Java_u4((address)&lpc[1]);
+              index = newindex;
+              break;
+            }
+            newindex += 1;
           }
+          // Profile switch.
+          BI_PROFILE_UPDATE_SWITCH(/*switch_index=*/index);
           address branch_pc = pc;
           UPDATE_PC_AND_TOS(skip, -1);
           DO_BACKEDGE_CHECKS(skip, branch_pc);
@@ -1574,7 +1719,7 @@
       if ((uint32_t)index >= (uint32_t)arrObj->length()) {                     \
           sprintf(message, "%d", index);                                       \
           VM_JAVA_ERROR(vmSymbols::java_lang_ArrayIndexOutOfBoundsException(), \
-                        message);                                              \
+                        message, note_rangeCheck_trap);                        \
       }
 
       /* 32-bit loads. These handle conversion from < 32-bit types */
@@ -1600,8 +1745,11 @@
           ARRAY_LOADTO32(T_INT, jint,   "%d",   STACK_INT, 0);
       CASE(_faload):
           ARRAY_LOADTO32(T_FLOAT, jfloat, "%f",   STACK_FLOAT, 0);
-      CASE(_aaload):
-          ARRAY_LOADTO32(T_OBJECT, oop,   INTPTR_FORMAT, STACK_OBJECT, 0);
+      CASE(_aaload): {
+          ARRAY_INTRO(-2);
+          SET_STACK_OBJECT(((objArrayOop) arrObj)->obj_at(index), -2);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+      }
       CASE(_baload):
           ARRAY_LOADTO32(T_BYTE, jbyte,  "%d",   STACK_INT, 0);
       CASE(_caload):
@@ -1645,21 +1793,24 @@
           // arrObj, index are set
           if (rhsObject != NULL) {
             /* Check assignability of rhsObject into arrObj */
-            Klass* rhsKlassOop = rhsObject->klass(); // EBX (subclass)
-            Klass* elemKlassOop = ObjArrayKlass::cast(arrObj->klass())->element_klass(); // superklass EAX
+            Klass* rhsKlass = rhsObject->klass(); // EBX (subclass)
+            Klass* elemKlass = ObjArrayKlass::cast(arrObj->klass())->element_klass(); // superklass EAX
             //
             // Check for compatibilty. This check must not GC!!
             // Seems way more expensive now that we must dispatch
             //
-            if (rhsKlassOop != elemKlassOop && !rhsKlassOop->is_subtype_of(elemKlassOop)) { // ebx->is...
-              VM_JAVA_ERROR(vmSymbols::java_lang_ArrayStoreException(), "");
+            if (rhsKlass != elemKlass && !rhsKlass->is_subtype_of(elemKlass)) { // ebx->is...
+              // Decrement counter if subtype check failed.
+              BI_PROFILE_SUBTYPECHECK_FAILED(rhsKlass);
+              VM_JAVA_ERROR(vmSymbols::java_lang_ArrayStoreException(), "", note_arrayCheck_trap);
             }
+            // Profile checkcast with null_seen and receiver.
+            BI_PROFILE_UPDATE_CHECKCAST(/*null_seen=*/false, rhsKlass);
+          } else {
+            // Profile checkcast with null_seen and receiver.
+            BI_PROFILE_UPDATE_CHECKCAST(/*null_seen=*/true, NULL);
           }
-          oop* elem_loc = (oop*)(((address) arrObj->base(T_OBJECT)) + index * sizeof(oop));
-          // *(oop*)(((address) arrObj->base(T_OBJECT)) + index * sizeof(oop)) = rhsObject;
-          *elem_loc = rhsObject;
-          // Mark the card
-          OrderAccess::release_store(&BYTE_MAP_BASE[(uintptr_t)elem_loc >> CardTableModRefBS::card_shift], 0);
+          ((objArrayOop) arrObj)->obj_at_put(index, rhsObject);
           UPDATE_PC_AND_TOS_AND_CONTINUE(1, -3);
       }
       CASE(_bastore):
@@ -1700,14 +1851,87 @@
         }
         if (entry != NULL) {
           entry->set_obj(lockee);
-          markOop displaced = lockee->mark()->set_unlocked();
-          entry->lock()->set_displaced_header(displaced);
-          if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
-            // Is it simple recursive case?
-            if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
-              entry->lock()->set_displaced_header(NULL);
-            } else {
-              CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+          int success = false;
+          uintptr_t epoch_mask_in_place = (uintptr_t)markOopDesc::epoch_mask_in_place;
+
+          markOop mark = lockee->mark();
+          intptr_t hash = (intptr_t) markOopDesc::no_hash;
+          // implies UseBiasedLocking
+          if (mark->has_bias_pattern()) {
+            uintptr_t thread_ident;
+            uintptr_t anticipated_bias_locking_value;
+            thread_ident = (uintptr_t)istate->thread();
+            anticipated_bias_locking_value =
+              (((uintptr_t)lockee->klass()->prototype_header() | thread_ident) ^ (uintptr_t)mark) &
+              ~((uintptr_t) markOopDesc::age_mask_in_place);
+
+            if  (anticipated_bias_locking_value == 0) {
+              // already biased towards this thread, nothing to do
+              if (PrintBiasedLockingStatistics) {
+                (* BiasedLocking::biased_lock_entry_count_addr())++;
+              }
+              success = true;
+            }
+            else if ((anticipated_bias_locking_value & markOopDesc::biased_lock_mask_in_place) != 0) {
+              // try revoke bias
+              markOop header = lockee->klass()->prototype_header();
+              if (hash != markOopDesc::no_hash) {
+                header = header->copy_set_hash(hash);
+              }
+              if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), mark) == mark) {
+                if (PrintBiasedLockingStatistics)
+                  (*BiasedLocking::revoked_lock_entry_count_addr())++;
+              }
+            }
+            else if ((anticipated_bias_locking_value & epoch_mask_in_place) !=0) {
+              // try rebias
+              markOop new_header = (markOop) ( (intptr_t) lockee->klass()->prototype_header() | thread_ident);
+              if (hash != markOopDesc::no_hash) {
+                new_header = new_header->copy_set_hash(hash);
+              }
+              if (Atomic::cmpxchg_ptr((void*)new_header, lockee->mark_addr(), mark) == mark) {
+                if (PrintBiasedLockingStatistics)
+                  (* BiasedLocking::rebiased_lock_entry_count_addr())++;
+              }
+              else {
+                CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+              }
+              success = true;
+            }
+            else {
+              // try to bias towards thread in case object is anonymously biased
+              markOop header = (markOop) ((uintptr_t) mark & ((uintptr_t)markOopDesc::biased_lock_mask_in_place |
+                                                              (uintptr_t)markOopDesc::age_mask_in_place |
+                                                              epoch_mask_in_place));
+              if (hash != markOopDesc::no_hash) {
+                header = header->copy_set_hash(hash);
+              }
+              markOop new_header = (markOop) ((uintptr_t) header | thread_ident);
+              // debugging hint
+              DEBUG_ONLY(entry->lock()->set_displaced_header((markOop) (uintptr_t) 0xdeaddead);)
+              if (Atomic::cmpxchg_ptr((void*)new_header, lockee->mark_addr(), header) == header) {
+                if (PrintBiasedLockingStatistics)
+                  (* BiasedLocking::anonymously_biased_lock_entry_count_addr())++;
+              }
+              else {
+                CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+              }
+              success = true;
+            }
+          }
+
+          // traditional lightweight locking
+          if (!success) {
+            markOop displaced = lockee->mark()->set_unlocked();
+            entry->lock()->set_displaced_header(displaced);
+            bool call_vm = UseHeavyMonitors;
+            if (call_vm || Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
+              // Is it simple recursive case?
+              if (!call_vm && THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
+                entry->lock()->set_displaced_header(NULL);
+              } else {
+                CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+              }
             }
           }
           UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
@@ -1729,12 +1953,15 @@
             BasicLock* lock = most_recent->lock();
             markOop header = lock->displaced_header();
             most_recent->set_obj(NULL);
-            // If it isn't recursive we either must swap old header or call the runtime
-            if (header != NULL) {
-              if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
-                // restore object for the slow case
-                most_recent->set_obj(lockee);
-                CALL_VM(InterpreterRuntime::monitorexit(THREAD, most_recent), handle_exception);
+            if (!lockee->mark()->has_bias_pattern()) {
+              bool call_vm = UseHeavyMonitors;
+              // If it isn't recursive we either must swap old header or call the runtime
+              if (header != NULL || call_vm) {
+                if (call_vm || Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
+                  // restore object for the slow case
+                  most_recent->set_obj(lockee);
+                  CALL_VM(InterpreterRuntime::monitorexit(THREAD, most_recent), handle_exception);
+                }
               }
             }
             UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
@@ -1807,6 +2034,9 @@
           TosState tos_type = cache->flag_state();
           int field_offset = cache->f2_as_index();
           if (cache->is_volatile()) {
+            if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+              OrderAccess::fence();
+            }
             if (tos_type == atos) {
               VERIFY_OOP(obj->obj_field_acquire(field_offset));
               SET_STACK_OBJECT(obj->obj_field_acquire(field_offset), -1);
@@ -1923,7 +2153,6 @@
             } else if (tos_type == atos) {
               VERIFY_OOP(STACK_OBJECT(-1));
               obj->release_obj_field_put(field_offset, STACK_OBJECT(-1));
-              OrderAccess::release_store(&BYTE_MAP_BASE[(uintptr_t)obj >> CardTableModRefBS::card_shift], 0);
             } else if (tos_type == btos) {
               obj->release_byte_field_put(field_offset, STACK_INT(-1));
             } else if (tos_type == ltos) {
@@ -1944,7 +2173,6 @@
             } else if (tos_type == atos) {
               VERIFY_OOP(STACK_OBJECT(-1));
               obj->obj_field_put(field_offset, STACK_OBJECT(-1));
-              OrderAccess::release_store(&BYTE_MAP_BASE[(uintptr_t)obj >> CardTableModRefBS::card_shift], 0);
             } else if (tos_type == btos) {
               obj->byte_field_put(field_offset, STACK_INT(-1));
             } else if (tos_type == ltos) {
@@ -1981,10 +2209,14 @@
             if (UseTLAB) {
               result = (oop) THREAD->tlab().allocate(obj_size);
             }
+            // Disable non-TLAB-based fast-path, because profiling requires that all
+            // allocations go through InterpreterRuntime::_new() if THREAD->tlab().allocate
+            // returns NULL.
+#ifndef CC_INTERP_PROFILE
             if (result == NULL) {
               need_zero = true;
               // Try allocate in shared eden
-        retry:
+            retry:
               HeapWord* compare_to = *Universe::heap()->top_addr();
               HeapWord* new_top = compare_to + obj_size;
               if (new_top <= *Universe::heap()->end_addr()) {
@@ -1994,6 +2226,7 @@
                 result = (oop) compare_to;
               }
             }
+#endif
             if (result != NULL) {
               // Initialize object (if nonzero size and need) and then the header
               if (need_zero ) {
@@ -2010,6 +2243,9 @@
               }
               result->set_klass_gap(0);
               result->set_klass(k_entry);
+              // Must prevent reordering of stores for object initialization
+              // with stores that publish the new object.
+              OrderAccess::storestore();
               SET_STACK_OBJECT(result, 0);
               UPDATE_PC_AND_TOS_AND_CONTINUE(3, 1);
             }
@@ -2018,6 +2254,9 @@
         // Slow case allocation
         CALL_VM(InterpreterRuntime::_new(THREAD, METHOD->constants(), index),
                 handle_exception);
+        // Must prevent reordering of stores for object initialization
+        // with stores that publish the new object.
+        OrderAccess::storestore();
         SET_STACK_OBJECT(THREAD->vm_result(), 0);
         THREAD->set_vm_result(NULL);
         UPDATE_PC_AND_TOS_AND_CONTINUE(3, 1);
@@ -2027,6 +2266,9 @@
         jint size = STACK_INT(-1);
         CALL_VM(InterpreterRuntime::anewarray(THREAD, METHOD->constants(), index, size),
                 handle_exception);
+        // Must prevent reordering of stores for object initialization
+        // with stores that publish the new object.
+        OrderAccess::storestore();
         SET_STACK_OBJECT(THREAD->vm_result(), -1);
         THREAD->set_vm_result(NULL);
         UPDATE_PC_AND_CONTINUE(3);
@@ -2041,6 +2283,9 @@
         //adjust pointer to start of stack element
         CALL_VM(InterpreterRuntime::multianewarray(THREAD, dimarray),
                 handle_exception);
+        // Must prevent reordering of stores for object initialization
+        // with stores that publish the new object.
+        OrderAccess::storestore();
         SET_STACK_OBJECT(THREAD->vm_result(), -dims);
         THREAD->set_vm_result(NULL);
         UPDATE_PC_AND_TOS_AND_CONTINUE(4, -(dims-1));
@@ -2049,61 +2294,63 @@
           if (STACK_OBJECT(-1) != NULL) {
             VERIFY_OOP(STACK_OBJECT(-1));
             u2 index = Bytes::get_Java_u2(pc+1);
-            if (ProfileInterpreter) {
-              // needs Profile_checkcast QQQ
-              ShouldNotReachHere();
-            }
             // Constant pool may have actual klass or unresolved klass. If it is
-            // unresolved we must resolve it
+            // unresolved we must resolve it.
             if (METHOD->constants()->tag_at(index).is_unresolved_klass()) {
               CALL_VM(InterpreterRuntime::quicken_io_cc(THREAD), handle_exception);
             }
             Klass* klassOf = (Klass*) METHOD->constants()->slot_at(index).get_klass();
-            Klass* objKlassOop = STACK_OBJECT(-1)->klass(); //ebx
+            Klass* objKlass = STACK_OBJECT(-1)->klass(); // ebx
             //
             // Check for compatibilty. This check must not GC!!
-            // Seems way more expensive now that we must dispatch
+            // Seems way more expensive now that we must dispatch.
             //
-            if (objKlassOop != klassOf &&
-                !objKlassOop->is_subtype_of(klassOf)) {
+            if (objKlass != klassOf && !objKlass->is_subtype_of(klassOf)) {
+              // Decrement counter at checkcast.
+              BI_PROFILE_SUBTYPECHECK_FAILED(objKlass);
               ResourceMark rm(THREAD);
-              const char* objName = objKlassOop->external_name();
+              const char* objName = objKlass->external_name();
               const char* klassName = klassOf->external_name();
               char* message = SharedRuntime::generate_class_cast_message(
                 objName, klassName);
-              VM_JAVA_ERROR(vmSymbols::java_lang_ClassCastException(), message);
+              VM_JAVA_ERROR(vmSymbols::java_lang_ClassCastException(), message, note_classCheck_trap);
             }
+            // Profile checkcast with null_seen and receiver.
+            BI_PROFILE_UPDATE_CHECKCAST(/*null_seen=*/false, objKlass);
           } else {
-            if (UncommonNullCast) {
-//              istate->method()->set_null_cast_seen();
-// [RGV] Not sure what to do here!
-
-            }
+            // Profile checkcast with null_seen and receiver.
+            BI_PROFILE_UPDATE_CHECKCAST(/*null_seen=*/true, NULL);
           }
           UPDATE_PC_AND_CONTINUE(3);
 
       CASE(_instanceof):
           if (STACK_OBJECT(-1) == NULL) {
             SET_STACK_INT(0, -1);
+            // Profile instanceof with null_seen and receiver.
+            BI_PROFILE_UPDATE_INSTANCEOF(/*null_seen=*/true, NULL);
           } else {
             VERIFY_OOP(STACK_OBJECT(-1));
             u2 index = Bytes::get_Java_u2(pc+1);
             // Constant pool may have actual klass or unresolved klass. If it is
-            // unresolved we must resolve it
+            // unresolved we must resolve it.
             if (METHOD->constants()->tag_at(index).is_unresolved_klass()) {
               CALL_VM(InterpreterRuntime::quicken_io_cc(THREAD), handle_exception);
             }
             Klass* klassOf = (Klass*) METHOD->constants()->slot_at(index).get_klass();
-            Klass* objKlassOop = STACK_OBJECT(-1)->klass();
+            Klass* objKlass = STACK_OBJECT(-1)->klass();
             //
             // Check for compatibilty. This check must not GC!!
-            // Seems way more expensive now that we must dispatch
+            // Seems way more expensive now that we must dispatch.
             //
-            if ( objKlassOop == klassOf || objKlassOop->is_subtype_of(klassOf)) {
+            if ( objKlass == klassOf || objKlass->is_subtype_of(klassOf)) {
               SET_STACK_INT(1, -1);
             } else {
               SET_STACK_INT(0, -1);
+              // Decrement counter at checkcast.
+              BI_PROFILE_SUBTYPECHECK_FAILED(objKlass);
             }
+            // Profile instanceof with null_seen and receiver.
+            BI_PROFILE_UPDATE_INSTANCEOF(/*null_seen=*/false, objKlass);
           }
           UPDATE_PC_AND_CONTINUE(3);
 
@@ -2246,6 +2493,9 @@
         istate->set_callee_entry_point(method->from_interpreted_entry());
         istate->set_bcp_advance(5);
 
+        // Invokedynamic has got a call counter, just like an invokestatic -> increment!
+        BI_PROFILE_UPDATE_CALL();
+
         UPDATE_PC_AND_RETURN(0); // I'll be back...
       }
 
@@ -2278,6 +2528,9 @@
         istate->set_callee_entry_point(method->from_interpreted_entry());
         istate->set_bcp_advance(3);
 
+        // Invokehandle has got a call counter, just like a final call -> increment!
+        BI_PROFILE_UPDATE_FINALCALL();
+
         UPDATE_PC_AND_RETURN(0); // I'll be back...
       }
 
@@ -2305,14 +2558,18 @@
           CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
           if (cache->is_vfinal()) {
             callee = cache->f2_as_vfinal_method();
+            // Profile 'special case of invokeinterface' final call.
+            BI_PROFILE_UPDATE_FINALCALL();
           } else {
-            // get receiver
+            // Get receiver.
             int parms = cache->parameter_size();
-            // Same comments as invokevirtual apply here
-            VERIFY_OOP(STACK_OBJECT(-parms));
-            InstanceKlass* rcvrKlass = (InstanceKlass*)
-                                 STACK_OBJECT(-parms)->klass();
+            // Same comments as invokevirtual apply here.
+            oop rcvr = STACK_OBJECT(-parms);
+            VERIFY_OOP(rcvr);
+            InstanceKlass* rcvrKlass = (InstanceKlass*)rcvr->klass();
             callee = (Method*) rcvrKlass->start_of_vtable()[ cache->f2_as_index()];
+            // Profile 'special case of invokeinterface' virtual call.
+            BI_PROFILE_UPDATE_VIRTUALCALL(rcvr->klass());
           }
           istate->set_callee(callee);
           istate->set_callee_entry_point(callee->from_interpreted_entry());
@@ -2343,15 +2600,18 @@
         // interface.  The link resolver checks this but only for the first
         // time this interface is called.
         if (i == int2->itable_length()) {
-          VM_JAVA_ERROR(vmSymbols::java_lang_IncompatibleClassChangeError(), "");
+          VM_JAVA_ERROR(vmSymbols::java_lang_IncompatibleClassChangeError(), "", note_no_trap);
         }
         int mindex = cache->f2_as_index();
         itableMethodEntry* im = ki->first_method_entry(rcvr->klass());
         callee = im[mindex].method();
         if (callee == NULL) {
-          VM_JAVA_ERROR(vmSymbols::java_lang_AbstractMethodError(), "");
+          VM_JAVA_ERROR(vmSymbols::java_lang_AbstractMethodError(), "", note_no_trap);
         }
 
+        // Profile virtual call.
+        BI_PROFILE_UPDATE_VIRTUALCALL(rcvr->klass());
+
         istate->set_callee(callee);
         istate->set_callee_entry_point(callee->from_interpreted_entry());
 #ifdef VM_JVMTI
@@ -2383,8 +2643,11 @@
           Method* callee;
           if ((Bytecodes::Code)opcode == Bytecodes::_invokevirtual) {
             CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
-            if (cache->is_vfinal()) callee = cache->f2_as_vfinal_method();
-            else {
+            if (cache->is_vfinal()) {
+              callee = cache->f2_as_vfinal_method();
+              // Profile final call.
+              BI_PROFILE_UPDATE_FINALCALL();
+            } else {
               // get receiver
               int parms = cache->parameter_size();
               // this works but needs a resourcemark and seems to create a vtable on every call:
@@ -2393,8 +2656,9 @@
               // this fails with an assert
               // InstanceKlass* rcvrKlass = InstanceKlass::cast(STACK_OBJECT(-parms)->klass());
               // but this works
-              VERIFY_OOP(STACK_OBJECT(-parms));
-              InstanceKlass* rcvrKlass = (InstanceKlass*) STACK_OBJECT(-parms)->klass();
+              oop rcvr = STACK_OBJECT(-parms);
+              VERIFY_OOP(rcvr);
+              InstanceKlass* rcvrKlass = (InstanceKlass*)rcvr->klass();
               /*
                 Executing this code in java.lang.String:
                     public String(char value[]) {
@@ -2412,12 +2676,17 @@
 
               */
               callee = (Method*) rcvrKlass->start_of_vtable()[ cache->f2_as_index()];
+              // Profile virtual call.
+              BI_PROFILE_UPDATE_VIRTUALCALL(rcvr->klass());
             }
           } else {
             if ((Bytecodes::Code)opcode == Bytecodes::_invokespecial) {
               CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
             }
             callee = cache->f1_as_method();
+
+            // Profile call.
+            BI_PROFILE_UPDATE_CALL();
           }
 
           istate->set_callee(callee);
@@ -2439,6 +2708,9 @@
         jint size = STACK_INT(-1);
         CALL_VM(InterpreterRuntime::newarray(THREAD, atype, size),
                 handle_exception);
+        // Must prevent reordering of stores for object initialization
+        // with stores that publish the new object.
+        OrderAccess::storestore();
         SET_STACK_OBJECT(THREAD->vm_result(), -1);
         THREAD->set_vm_result(NULL);
 
@@ -2469,6 +2741,8 @@
       CASE(_goto):
       {
           int16_t offset = (int16_t)Bytes::get_Java_u2(pc + 1);
+          // Profile jump.
+          BI_PROFILE_UPDATE_JUMP();
           address branch_pc = pc;
           UPDATE_PC(offset);
           DO_BACKEDGE_CHECKS(offset, branch_pc);
@@ -2485,6 +2759,8 @@
       CASE(_goto_w):
       {
           int32_t offset = Bytes::get_Java_u4(pc + 1);
+          // Profile jump.
+          BI_PROFILE_UPDATE_JUMP();
           address branch_pc = pc;
           UPDATE_PC(offset);
           DO_BACKEDGE_CHECKS(offset, branch_pc);
@@ -2494,6 +2770,9 @@
       /* return from a jsr or jsr_w */
 
       CASE(_ret): {
+          // Profile ret.
+          BI_PROFILE_UPDATE_RET(/*bci=*/((int)(intptr_t)(LOCALS_ADDR(pc[1]))));
+          // Now, update the pc.
           pc = istate->method()->code_base() + (intptr_t)(LOCALS_ADDR(pc[1]));
           UPDATE_PC_AND_CONTINUE(0);
       }
@@ -2567,23 +2846,26 @@
       if (TraceExceptions) {
         ttyLocker ttyl;
         ResourceMark rm;
-        tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", except_oop->print_value_string(), except_oop());
+        tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", except_oop->print_value_string(), (void*)except_oop());
         tty->print_cr(" thrown in interpreter method <%s>", METHOD->print_value_string());
         tty->print_cr(" at bci %d, continuing at %d for thread " INTPTR_FORMAT,
-                      pc - (intptr_t)METHOD->code_base(),
+                      istate->bcp() - (intptr_t)METHOD->code_base(),
                       continuation_bci, THREAD);
       }
       // for AbortVMOnException flag
       NOT_PRODUCT(Exceptions::debug_check_abort(except_oop));
+
+      // Update profiling data.
+      BI_PROFILE_ALIGN_TO_CURRENT_BCI();
       goto run;
     }
     if (TraceExceptions) {
       ttyLocker ttyl;
       ResourceMark rm;
-      tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", except_oop->print_value_string(), except_oop());
+      tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", except_oop->print_value_string(), (void*)except_oop());
       tty->print_cr(" thrown in interpreter method <%s>", METHOD->print_value_string());
       tty->print_cr(" at bci %d, unwinding for thread " INTPTR_FORMAT,
-                    pc  - (intptr_t) METHOD->code_base(),
+                    istate->bcp() - (intptr_t)METHOD->code_base(),
                     THREAD);
     }
     // for AbortVMOnException flag
@@ -2591,32 +2873,87 @@
     // No handler in this activation, unwind and try again
     THREAD->set_pending_exception(except_oop(), NULL, 0);
     goto handle_return;
-  }  /* handle_exception: */
-
-
+  }  // handle_exception:
 
   // Return from an interpreter invocation with the result of the interpretation
   // on the top of the Java Stack (or a pending exception)
 
-handle_Pop_Frame:
-
-  // We don't really do anything special here except we must be aware
-  // that we can get here without ever locking the method (if sync).
-  // Also we skip the notification of the exit.
-
-  istate->set_msg(popping_frame);
-  // Clear pending so while the pop is in process
-  // we don't start another one if a call_vm is done.
-  THREAD->clr_pop_frame_pending();
-  // Let interpreter (only) see the we're in the process of popping a frame
-  THREAD->set_pop_frame_in_process();
-
-handle_return:
-  {
+  handle_Pop_Frame: {
+
+    // We don't really do anything special here except we must be aware
+    // that we can get here without ever locking the method (if sync).
+    // Also we skip the notification of the exit.
+
+    istate->set_msg(popping_frame);
+    // Clear pending so while the pop is in process
+    // we don't start another one if a call_vm is done.
+    THREAD->clr_pop_frame_pending();
+    // Let interpreter (only) see the we're in the process of popping a frame
+    THREAD->set_pop_frame_in_process();
+
+    goto handle_return;
+
+  } // handle_Pop_Frame
+
+  // ForceEarlyReturn ends a method, and returns to the caller with a return value
+  // given by the invoker of the early return.
+  handle_Early_Return: {
+
+    istate->set_msg(early_return);
+
+    // Clear expression stack.
+    topOfStack = istate->stack_base() - Interpreter::stackElementWords;
+
+    JvmtiThreadState *ts = THREAD->jvmti_thread_state();
+
+    // Push the value to be returned.
+    switch (istate->method()->result_type()) {
+      case T_BOOLEAN:
+      case T_SHORT:
+      case T_BYTE:
+      case T_CHAR:
+      case T_INT:
+        SET_STACK_INT(ts->earlyret_value().i, 0);
+        MORE_STACK(1);
+        break;
+      case T_LONG:
+        SET_STACK_LONG(ts->earlyret_value().j, 1);
+        MORE_STACK(2);
+        break;
+      case T_FLOAT:
+        SET_STACK_FLOAT(ts->earlyret_value().f, 0);
+        MORE_STACK(1);
+        break;
+      case T_DOUBLE:
+        SET_STACK_DOUBLE(ts->earlyret_value().d, 1);
+        MORE_STACK(2);
+        break;
+      case T_ARRAY:
+      case T_OBJECT:
+        SET_STACK_OBJECT(ts->earlyret_oop(), 0);
+        MORE_STACK(1);
+        break;
+    }
+
+    ts->clr_earlyret_value();
+    ts->set_earlyret_oop(NULL);
+    ts->clr_earlyret_pending();
+
+    // Fall through to handle_return.
+
+  } // handle_Early_Return
+
+  handle_return: {
+    // A storestore barrier is required to order initialization of
+    // final fields with publishing the reference to the object that
+    // holds the field. Without the barrier the value of final fields
+    // can be observed to change.
+    OrderAccess::storestore();
+
     DECACHE_STATE();
 
-    bool suppress_error = istate->msg() == popping_frame;
-    bool suppress_exit_event = THREAD->has_pending_exception() || suppress_error;
+    bool suppress_error = istate->msg() == popping_frame || istate->msg() == early_return;
+    bool suppress_exit_event = THREAD->has_pending_exception() || istate->msg() == popping_frame;
     Handle original_exception(THREAD, THREAD->pending_exception());
     Handle illegal_state_oop(THREAD, NULL);
 
@@ -2677,15 +3014,18 @@
           BasicLock* lock = end->lock();
           markOop header = lock->displaced_header();
           end->set_obj(NULL);
-          // If it isn't recursive we either must swap old header or call the runtime
-          if (header != NULL) {
-            if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
-              // restore object for the slow case
-              end->set_obj(lockee);
-              {
-                // Prevent any HandleMarkCleaner from freeing our live handles
-                HandleMark __hm(THREAD);
-                CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, end));
+
+          if (!lockee->mark()->has_bias_pattern()) {
+            // If it isn't recursive we either must swap old header or call the runtime
+            if (header != NULL) {
+              if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
+                // restore object for the slow case
+                end->set_obj(lockee);
+                {
+                  // Prevent any HandleMarkCleaner from freeing our live handles
+                  HandleMark __hm(THREAD);
+                  CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, end));
+                }
               }
             }
           }
@@ -2730,27 +3070,41 @@
           oop rcvr = base->obj();
           if (rcvr == NULL) {
             if (!suppress_error) {
-              VM_JAVA_ERROR_NO_JUMP(vmSymbols::java_lang_NullPointerException(), "");
+              VM_JAVA_ERROR_NO_JUMP(vmSymbols::java_lang_NullPointerException(), "", note_nullCheck_trap);
               illegal_state_oop = THREAD->pending_exception();
               THREAD->clear_pending_exception();
             }
+          } else if (UseHeavyMonitors) {
+            {
+              // Prevent any HandleMarkCleaner from freeing our live handles.
+              HandleMark __hm(THREAD);
+              CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, base));
+            }
+            if (THREAD->has_pending_exception()) {
+              if (!suppress_error) illegal_state_oop = THREAD->pending_exception();
+              THREAD->clear_pending_exception();
+            }
           } else {
             BasicLock* lock = base->lock();
             markOop header = lock->displaced_header();
             base->set_obj(NULL);
-            // If it isn't recursive we either must swap old header or call the runtime
-            if (header != NULL) {
-              if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), lock) != lock) {
-                // restore object for the slow case
-                base->set_obj(rcvr);
-                {
-                  // Prevent any HandleMarkCleaner from freeing our live handles
-                  HandleMark __hm(THREAD);
-                  CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, base));
-                }
-                if (THREAD->has_pending_exception()) {
-                  if (!suppress_error) illegal_state_oop = THREAD->pending_exception();
-                  THREAD->clear_pending_exception();
+
+            if (!rcvr->mark()->has_bias_pattern()) {
+              base->set_obj(NULL);
+              // If it isn't recursive we either must swap old header or call the runtime
+              if (header != NULL) {
+                if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), lock) != lock) {
+                  // restore object for the slow case
+                  base->set_obj(rcvr);
+                  {
+                    // Prevent any HandleMarkCleaner from freeing our live handles
+                    HandleMark __hm(THREAD);
+                    CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, base));
+                  }
+                  if (THREAD->has_pending_exception()) {
+                    if (!suppress_error) illegal_state_oop = THREAD->pending_exception();
+                    THREAD->clear_pending_exception();
+                  }
                 }
               }
             }
@@ -2758,6 +3112,8 @@
         }
       }
     }
+    // Clear the do_not_unlock flag now.
+    THREAD->clr_do_not_unlock();
 
     //
     // Notify jvmti/jvmdi
@@ -2802,15 +3158,14 @@
     // A pending exception that was pending prior to a possible popping frame
     // overrides the popping frame.
     //
-    assert(!suppress_error || suppress_error && illegal_state_oop() == NULL, "Error was not suppressed");
+    assert(!suppress_error || (suppress_error && illegal_state_oop() == NULL), "Error was not suppressed");
     if (illegal_state_oop() != NULL || original_exception() != NULL) {
-      // inform the frame manager we have no result
+      // Inform the frame manager we have no result.
       istate->set_msg(throwing_exception);
       if (illegal_state_oop() != NULL)
         THREAD->set_pending_exception(illegal_state_oop(), NULL, 0);
       else
         THREAD->set_pending_exception(original_exception(), NULL, 0);
-      istate->set_return_kind((Bytecodes::Code)opcode);
       UPDATE_PC_AND_RETURN(0);
     }
 
@@ -2829,13 +3184,12 @@
                                 LOCALS_SLOT(METHOD->size_of_parameters() - 1));
         THREAD->set_popframe_condition_bit(JavaThread::popframe_force_deopt_reexecution_bit);
       }
-      THREAD->clr_pop_frame_in_process();
+    } else {
+      istate->set_msg(return_from_method);
     }
 
     // Normal return
     // Advance the pc and return to frame manager
-    istate->set_msg(return_from_method);
-    istate->set_return_kind((Bytecodes::Code)opcode);
     UPDATE_PC_AND_RETURN(1);
   } /* handle_return: */
 
@@ -2883,7 +3237,7 @@
 }
 
 oop BytecodeInterpreter::stack_object(intptr_t *tos, int offset) {
-  return (oop)tos [Interpreter::expr_index_at(-offset)];
+  return cast_to_oop(tos [Interpreter::expr_index_at(-offset)]);
 }
 
 jdouble BytecodeInterpreter::stack_double(intptr_t *tos, int offset) {
@@ -2952,7 +3306,7 @@
   return (jfloat)locals[Interpreter::local_index_at(-offset)];
 }
 oop BytecodeInterpreter::locals_object(intptr_t* locals, int offset) {
-  return (oop)locals[Interpreter::local_index_at(-offset)];
+  return cast_to_oop(locals[Interpreter::local_index_at(-offset)]);
 }
 jdouble BytecodeInterpreter::locals_double(intptr_t* locals, int offset) {
   return ((VMJavaVal64*)&locals[Interpreter::local_index_at(-(offset+1))])->d;
@@ -3110,9 +3464,8 @@
   tty->print_cr("result_to_call._bcp_advance: %d ", this->_result._to_call._bcp_advance);
   tty->print_cr("osr._osr_buf: " INTPTR_FORMAT, (uintptr_t) this->_result._osr._osr_buf);
   tty->print_cr("osr._osr_entry: " INTPTR_FORMAT, (uintptr_t) this->_result._osr._osr_entry);
-  tty->print_cr("result_return_kind 0x%x ", (int) this->_result._return_kind);
   tty->print_cr("prev_link: " INTPTR_FORMAT, (uintptr_t) this->_prev_link);
-  tty->print_cr("native_mirror: " INTPTR_FORMAT, (uintptr_t) this->_oop_temp);
+  tty->print_cr("native_mirror: " INTPTR_FORMAT, (void*) this->_oop_temp);
   tty->print_cr("stack_base: " INTPTR_FORMAT, (uintptr_t) this->_stack_base);
   tty->print_cr("stack_limit: " INTPTR_FORMAT, (uintptr_t) this->_stack_limit);
   tty->print_cr("monitor_base: " INTPTR_FORMAT, (uintptr_t) this->_monitor_base);
@@ -3129,9 +3482,9 @@
 }
 
 extern "C" {
-    void PI(uintptr_t arg) {
-        ((BytecodeInterpreter*)arg)->print();
-    }
+  void PI(uintptr_t arg) {
+    ((BytecodeInterpreter*)arg)->print();
+  }
 }
 #endif // PRODUCT