8068945: Use RBP register as proper frame pointer in JIT compiled code on x86
authorzmajo
Mon, 27 Apr 2015 10:49:43 +0200
changeset 30305 b92a97e1e9cb
parent 30302 ecca632210ef
child 30306 83fe12c6f39b
8068945: Use RBP register as proper frame pointer in JIT compiled code on x86 Summary: Introduce the PreserveFramePointer flag to control if RBP is used as the frame pointer or as a general purpose register. Reviewed-by: kvn, roland, dlong, enevill, shade
hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java
hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp
hotspot/src/cpu/ppc/vm/globals_ppc.hpp
hotspot/src/cpu/sparc/vm/globals_sparc.hpp
hotspot/src/cpu/x86/vm/assembler_x86.hpp
hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp
hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp
hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp
hotspot/src/cpu/x86/vm/frame_x86.cpp
hotspot/src/cpu/x86/vm/frame_x86.hpp
hotspot/src/cpu/x86/vm/frame_x86.inline.hpp
hotspot/src/cpu/x86/vm/globals_x86.hpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
hotspot/src/cpu/x86/vm/methodHandles_x86.cpp
hotspot/src/cpu/x86/vm/runtime_x86_32.cpp
hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
hotspot/src/cpu/x86/vm/x86.ad
hotspot/src/cpu/x86/vm/x86_32.ad
hotspot/src/cpu/x86/vm/x86_64.ad
hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
hotspot/src/share/vm/c1/c1_LIR.cpp
hotspot/src/share/vm/c1/c1_LIR.hpp
hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
hotspot/src/share/vm/opto/bytecodeInfo.cpp
hotspot/src/share/vm/prims/forte.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/sharedRuntime.cpp
hotspot/src/share/vm/runtime/vframe.hpp
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Mon Apr 27 10:49:43 2015 +0200
@@ -314,26 +314,17 @@
   //------------------------------------------------------------------------------
   // frame::adjust_unextended_sp
   private void adjustUnextendedSP() {
-    // If we are returning to a compiled MethodHandle call site, the
-    // saved_fp will in fact be a saved value of the unextended SP.  The
-    // simplest way to tell whether we are returning to such a call site
-    // is as follows:
+    // On x86, sites calling method handle intrinsics and lambda forms are treated
+    // as any other call site. Therefore, no special action is needed when we are
+    // returning to any of these call sites.
 
     CodeBlob cb = cb();
     NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
     if (senderNm != null) {
-      // If the sender PC is a deoptimization point, get the original
-      // PC.  For MethodHandle call site the unextended_sp is stored in
-      // saved_fp.
-      if (senderNm.isDeoptMhEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
-        raw_unextendedSP = getFP();
-      }
-      else if (senderNm.isDeoptEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
-      }
-      else if (senderNm.isMethodHandleReturn(getPC())) {
-        raw_unextendedSP = getFP();
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (senderNm.isDeoptEntry(getPC()) ||
+          senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
       }
     }
   }
--- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -68,6 +68,8 @@
 
 define_pd_global(bool, UseMembar,            true);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -55,6 +55,8 @@
 
 define_pd_global(bool, UseMembar,             false);
 
+define_pd_global(bool, PreserveFramePointer,  false);
+
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
 
--- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -74,6 +74,8 @@
 
 define_pd_global(bool, UseMembar,            false);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -142,8 +142,10 @@
 
 #endif // _LP64
 
-// JSR 292 fixed register usages:
-REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
+// JSR 292
+// On x86, the SP does not have to be saved when invoking method handle intrinsics
+// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
+REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
 
 // Address is an abstraction used to represent a memory location
 // using any of the amd64 addressing modes with one object.
--- a/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -343,14 +343,13 @@
   return FrameMap::rsp_opr;
 }
 
-
 // JSR 292
+// On x86, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-  assert(rbp == rbp_mh_SP_save, "must be same register");
-  return rbp_opr;
+  return LIR_OprFact::illegalOpr;
 }
 
-
 bool FrameMap::validate_frame() {
   return true;
 }
--- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -360,6 +360,9 @@
   generate_stack_overflow_check(bang_size_in_bytes);
 
   push(rbp);
+  if (PreserveFramePointer) {
+    mov(rbp, rsp);
+  }
 #ifdef TIERED
   // c2 leaves fpu stack dirty. Clean it on entry
   if (UseSSE < 2 ) {
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -754,14 +754,9 @@
     // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
     // since we do a leave anyway.
 
-    // Pop the return address since we are possibly changing SP (restoring from BP).
+    // Pop the return address.
     __ leave();
     __ pop(rcx);
-
-    // Restore SP from BP if the exception PC is a method handle call site.
-    NOT_LP64(__ get_thread(thread);)
-    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
     __ jmp(rcx);  // jump to exception handler
     break;
   default:  ShouldNotReachHere();
@@ -832,11 +827,6 @@
   // the pop is also necessary to simulate the effect of a ret(0)
   __ pop(exception_pc);
 
-  // Restore SP from BP if the exception PC is a method handle call site.
-  NOT_LP64(__ get_thread(thread);)
-  __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // continue at exception handler (return address removed)
   // note: do *not* remove arguments when unwinding the
   //       activation since the caller assumes having
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -224,7 +224,8 @@
     if (sender_blob->is_nmethod()) {
         nmethod* nm = sender_blob->as_nmethod_or_null();
         if (nm != NULL) {
-            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+                nm->method()->is_method_handle_intrinsic()) {
                 return false;
             }
         }
@@ -391,10 +392,9 @@
 // frame::verify_deopt_original_pc
 //
 // Verifies the calculated original PC of a deoptimization PC for the
-// given unextended SP.  The unextended SP might also be the saved SP
-// for MethodHandle call sites.
+// given unextended SP.
 #ifdef ASSERT
-void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
   frame fr;
 
   // This is ugly but it's better than to change {get,set}_original_pc
@@ -404,33 +404,23 @@
 
   address original_pc = nm->get_original_pc(&fr);
   assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
-  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
 }
 #endif
 
 //------------------------------------------------------------------------------
 // frame::adjust_unextended_sp
 void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP.  The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
+  // On x86, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
 
   nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
   if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
-      _unextended_sp = _fp;
-    }
-    else if (sender_nm->is_deopt_entry(_pc)) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
       DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
     }
-    else if (sender_nm->is_method_handle_return(_pc)) {
-      _unextended_sp = _fp;
-    }
   }
 }
 
--- a/hotspot/src/cpu/x86/vm/frame_x86.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/frame_x86.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -76,11 +76,11 @@
 //    [locals and parameters   ]
 //                               <- sender sp
 
-// [1] When the c++ interpreter calls a new method it returns to the frame
+// [1] When the C++ interpreter calls a new method it returns to the frame
 //     manager which allocates a new frame on the stack. In that case there
 //     is no real callee of this newly allocated frame. The frame manager is
-//     aware of the  additional frame(s) and will pop them as nested calls
-//     complete. Howevers tTo make it look good in the debugger the frame
+//     aware of the additional frame(s) and will pop them as nested calls
+//     complete. However, to make it look good in the debugger the frame
 //     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
 //     with a fake interpreter_state* parameter to make it easy to debug
 //     nested calls.
@@ -88,7 +88,7 @@
 // Note that contrary to the layout for the assembly interpreter the
 // expression stack allocated for the C++ interpreter is full sized.
 // However this is not as bad as it seems as the interpreter frame_manager
-// will truncate the unused space on succesive method calls.
+// will truncate the unused space on successive method calls.
 //
 // ------------------------------ C++ interpreter ----------------------------------------
 
@@ -167,10 +167,7 @@
 
 #ifdef ASSERT
   // Used in frame::sender_for_{interpreter,compiled}_frame
-  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
-  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
-    verify_deopt_original_pc(nm, unextended_sp, true);
-  }
+  static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
 #endif
 
  public:
--- a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -94,7 +94,7 @@
   // find_blob call. This is also why we can have no asserts on the validity
   // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
   // -> pd_last_frame should use a specialized version of pd_last_frame which could
-  // call a specilaized frame constructor instead of this one.
+  // call a specialized frame constructor instead of this one.
   // Then we could use the assert below. However this assert is of somewhat dubious
   // value.
   // assert(_pc != NULL, "no pc?");
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -82,6 +82,8 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
                                                                             \
   develop(bool, IEEEPrecision, true,                                        \
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -6090,6 +6090,10 @@
     // We always push rbp, so that on return to interpreter rbp, will be
     // restored correctly and we can correct the stack.
     push(rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      mov(rbp, rsp);
+    }
     // Remove word for ebp
     framesize -= wordSize;
 
@@ -6104,6 +6108,11 @@
     // Save RBP register now.
     framesize -= wordSize;
     movptr(Address(rsp, framesize), rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      movptr(rbp, rsp);
+      addptr(rbp, framesize + wordSize);
+    }
   }
 
   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -374,7 +374,7 @@
     //  member_reg - MemberName that was the trailing argument
     //  temp1_recv_klass - klass of stacked receiver, if needed
     //  rsi/r13 - interpreter linkage (if interpreted)
-    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
+    //  rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
 
     Label L_incompatible_class_change_error;
     switch (iid) {
--- a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -126,10 +126,6 @@
 
   // rax: exception handler for given <exception oop/exception pc>
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax, (could be deopt blob)
   // rdx - throwing pc, deopt blob will need it.
 
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -3393,8 +3393,8 @@
 
   // Save callee-saved registers.  See x86_64.ad.
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
-  // convention will save restore it in prolog/epilog) Other than that
+  // rbp is an implicitly saved callee saved register (i.e., the calling
+  // convention will save/restore it in the prolog/epilog). Other than that
   // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
@@ -3436,9 +3436,9 @@
 
   // Restore callee-saved registers
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
+  // rbp is an implicitly saved callee-saved register (i.e., the calling
   // convention will save restore it in prolog/epilog) Other than that
-  // there are no callee save registers no that adapter frames are gone.
+  // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
 
@@ -3447,10 +3447,6 @@
 
   // rax: exception handler
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax (could be deopt blob).
   __ mov(r8, rax);
 
--- a/hotspot/src/cpu/x86/vm/x86.ad	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Mon Apr 27 10:49:43 2015 +0200
@@ -930,21 +930,6 @@
 
 encode %{
 
-  enc_class preserve_SP %{
-    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
-    // RBP is preserved across all calls, even compiled calls.
-    // Use it to preserve RSP in places where the callee might change the SP.
-    __ movptr(rbp_mh_SP_save, rsp);
-    debug_only(int off1 = cbuf.insts_size());
-    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
-  %}
-
-  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
-    __ movptr(rsp, rbp_mh_SP_save);
-  %}
-
   enc_class call_epilog %{
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Mon Apr 27 10:49:43 2015 +0200
@@ -123,50 +123,94 @@
 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
+// Class for no registers (empty set).
+reg_class no_reg();
+
 // Class for all registers
-reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+// Class for all registers (excluding EBP)
+reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
+// Dynamic register class that selects at runtime between register classes
+// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 
+// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
+reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class for general registers
-reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
-// Class for general registers which may be used for implicit null checks on win95
-// Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+// Class for general registers (excluding EBP).
+// This register class can be used for implicit null checks on win95.
+// It is also safe for use by tailjumps (we don't want to allocate in ebp).
+// Used also if the PreserveFramePointer flag is true.
+reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
+// Dynamic register class that selects between int_reg and int_reg_no_ebp.
+reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of "X" registers
 reg_class int_x_reg(EBX, ECX, EDX, EAX);
+
 // Class of registers that can appear in an address with no offset.
 // EBP and ESP require an extra instruction byte for zero offset.
 // Used in fast-unlock
 reg_class p_reg(EDX, EDI, ESI, EBX);
-// Class for general registers not including ECX
-reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
-// Class for general registers not including EAX
+
+// Class for general registers excluding ECX
+reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
+// Class for general registers excluding ECX (and EBP)
+reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
+// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
+reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
+
+// Class for general registers excluding EAX
 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
-// Class for general registers not including EAX or EBX.
-reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
+
+// Class for general registers excluding EAX and EBX.
+reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
+// Class for general registers excluding EAX and EBX (and EBP)
+reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
+// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
+reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of EAX (for multiply and divide operations)
 reg_class eax_reg(EAX);
+
 // Class of EBX (for atomic add)
 reg_class ebx_reg(EBX);
+
 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 reg_class ecx_reg(ECX);
+
 // Class of EDX (for multiply and divide operations)
 reg_class edx_reg(EDX);
+
 // Class of EDI (for synchronization)
 reg_class edi_reg(EDI);
+
 // Class of ESI (for synchronization)
 reg_class esi_reg(ESI);
-// Singleton class for interpreter's stack pointer
-reg_class ebp_reg(EBP);
+
 // Singleton class for stack pointer
 reg_class sp_reg(ESP);
+
 // Singleton class for instruction pointer
 // reg_class ip_reg(EIP);
+
 // Class of integer register pairs
-reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
+reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
+// Class of integer register pairs (excluding EBP and EDI);
+reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
+// Dynamic register class that selects between long_reg and long_reg_no_ebp.
+reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of integer register pairs that aligns with calling convention
 reg_class eadx_reg( EAX,EDX );
 reg_class ebcx_reg( ECX,EBX );
+
 // Not AX or DX, used in divides
-reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
+reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
+// Not AX or DX (and neither EBP), used in divides
+reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
+// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
+reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 
 // Floating point registers.  Notice FPR0 is not a choice.
 // FPR0 is not ever allocated; we use clever encodings to fake
@@ -240,18 +284,11 @@
   return size;
 }
 
-static int preserve_SP_size() {
-  return 2;  // op, rm(reg/reg)
-}
-
 // !!!!! Special hack to get all type of calls to specify the byte offset
 //       from the start of the call to the point where the return address
 //       will point.
 int MachCallStaticJavaNode::ret_addr_offset() {
-  int offset = 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
-  if (_method_handle_invoke)
-    offset += preserve_SP_size();
-  return offset;
+  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points  
 }
 
 int MachCallDynamicJavaNode::ret_addr_offset() {
@@ -285,15 +322,6 @@
 
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
-  current_offset += pre_call_resets_size();  // skip fldcw, if any
-  current_offset += preserve_SP_size();   // skip mov rbp, rsp
-  current_offset += 1;      // skip call opcode byte
-  return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
   current_offset += pre_call_resets_size();  // skip fldcw, if any
   current_offset += 5;      // skip MOV instruction
@@ -523,6 +551,10 @@
     st->print("# stack bang (%d bytes)", bangsize);
     st->print("\n\t");
     st->print("PUSH   EBP\t# Save EBP");
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
+    }
     if (framesize) {
       st->print("\n\t");
       st->print("SUB    ESP, #%d\t# Create frame",framesize);
@@ -532,6 +564,10 @@
     st->print("\n\t");
     framesize -= wordSize;
     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
+    }
   }
 
   if (VerifyStackAtCalls) {
@@ -1489,7 +1525,7 @@
 }
 
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return EBP_REG_mask();
+  return NO_REG_mask();
 }
 
 // Returns true if the high 32 bits of the value is known to be zero.
@@ -3735,7 +3771,7 @@
 
 // On windows95, EBP is not safe to use for implicit null tests.
 operand eRegP_no_EBP() %{
-  constraint(ALLOC_IN_RC(int_reg_no_rbp));
+  constraint(ALLOC_IN_RC(int_reg_no_ebp));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -3824,13 +3860,6 @@
   interface(REG_INTER);
 %}
 
-operand eBPRegP() %{
-  constraint(ALLOC_IN_RC(ebp_reg));
-  match(RegP);
-  format %{ "EBP" %}
-  interface(REG_INTER);
-%}
-
 operand eRegL() %{
   constraint(ALLOC_IN_RC(long_reg));
   match(RegL);
@@ -12615,7 +12644,6 @@
 //       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
-  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
   effect(USE meth);
 
   ins_cost(300);
@@ -12629,29 +12657,6 @@
   ins_alignment(4);
 %}
 
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
-  match(CallStaticJava);
-  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
-  effect(USE meth);
-  // EBP is saved by all callees (for interpreter stack correction).
-  // We use it here for a similar purpose, in {preserve,restore}_SP.
-
-  ins_cost(300);
-  format %{ "CALL,static/MethodHandle " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode( pre_call_resets,
-              preserve_SP,
-              Java_Static_Call( meth ),
-              restore_SP,
-              call_epilog,
-              post_call_FPU );
-  ins_pipe( pipe_slow );
-  ins_alignment(4);
-%}
-
 // Call Java Dynamic Instruction
 // Note: If this code changes, the corresponding ret_addr_offset() and
 //       compute_padding() functions will have to be adjusted.
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Mon Apr 27 10:49:43 2015 +0200
@@ -166,42 +166,67 @@
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
 
-// Class for all pointer registers (including RSP)
-reg_class any_reg(RAX, RAX_H,
-                  RDX, RDX_H,
-                  RBP, RBP_H,
-                  RDI, RDI_H,
-                  RSI, RSI_H,
-                  RCX, RCX_H,
-                  RBX, RBX_H,
-                  RSP, RSP_H,
-                  R8,  R8_H,
-                  R9,  R9_H,
-                  R10, R10_H,
-                  R11, R11_H,
-                  R12, R12_H,
-                  R13, R13_H,
-                  R14, R14_H,
-                  R15, R15_H);
-
-// Class for all pointer registers except RSP
-reg_class ptr_reg(RAX, RAX_H,
-                  RDX, RDX_H,
-                  RBP, RBP_H,
-                  RDI, RDI_H,
-                  RSI, RSI_H,
-                  RCX, RCX_H,
-                  RBX, RBX_H,
-                  R8,  R8_H,
-                  R9,  R9_H,
-                  R10, R10_H,
-                  R11, R11_H,
-                  R13, R13_H,
-                  R14, R14_H);
-
-// Class for all pointer registers except RAX and RSP
-reg_class ptr_no_rax_reg(RDX, RDX_H,
-                         RBP, RBP_H,
+// Empty register class.
+reg_class no_reg();
+
+// Class for all pointer registers (including RSP and RBP)
+reg_class any_reg_with_rbp(RAX, RAX_H,
+                           RDX, RDX_H,
+                           RBP, RBP_H,               
+                           RDI, RDI_H,
+                           RSI, RSI_H,
+                           RCX, RCX_H,
+                           RBX, RBX_H,
+                           RSP, RSP_H,
+                           R8,  R8_H,
+                           R9,  R9_H,
+                           R10, R10_H,
+                           R11, R11_H,
+                           R12, R12_H,
+                           R13, R13_H,
+                           R14, R14_H,
+                           R15, R15_H);
+
+// Class for all pointer registers (including RSP, but excluding RBP)
+reg_class any_reg_no_rbp(RAX, RAX_H,
+                         RDX, RDX_H,                
+                         RDI, RDI_H,
+                         RSI, RSI_H,
+                         RCX, RCX_H,
+                         RBX, RBX_H,
+                         RSP, RSP_H,
+                         R8,  R8_H,
+                         R9,  R9_H,
+                         R10, R10_H,
+                         R11, R11_H,
+                         R12, R12_H,
+                         R13, R13_H,
+                         R14, R14_H,
+                         R15, R15_H);
+
+// Dynamic register class that selects at runtime between register classes
+// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer). 
+// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
+reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
+                  
+// Class for all pointer registers (excluding RSP)
+reg_class ptr_reg_with_rbp(RAX, RAX_H,
+                           RDX, RDX_H,
+                           RBP, RBP_H,
+                           RDI, RDI_H,
+                           RSI, RSI_H,
+                           RCX, RCX_H,
+                           RBX, RBX_H,
+                           R8,  R8_H,
+                           R9,  R9_H,
+                           R10, R10_H,
+                           R11, R11_H,
+                           R13, R13_H,
+                           R14, R14_H);
+
+// Class for all pointer registers (excluding RSP and RBP)
+reg_class ptr_reg_no_rbp(RAX, RAX_H,
+                         RDX, RDX_H,                         
                          RDI, RDI_H,
                          RSI, RSI_H,
                          RCX, RCX_H,
@@ -213,31 +238,66 @@
                          R13, R13_H,
                          R14, R14_H);
 
-reg_class ptr_no_rbp_reg(RDX, RDX_H,
-                         RAX, RAX_H,
-                         RDI, RDI_H,
-                         RSI, RSI_H,
-                         RCX, RCX_H,
-                         RBX, RBX_H,
-                         R8,  R8_H,
-                         R9,  R9_H,
-                         R10, R10_H,
-                         R11, R11_H,
-                         R13, R13_H,
-                         R14, R14_H);
-
-// Class for all pointer registers except RAX, RBX and RSP
-reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
-                             RBP, RBP_H,
-                             RDI, RDI_H,
-                             RSI, RSI_H,
-                             RCX, RCX_H,
-                             R8,  R8_H,
-                             R9,  R9_H,
-                             R10, R10_H,
-                             R11, R11_H,
-                             R13, R13_H,
-                             R14, R14_H);
+// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
+reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX and RSP)
+reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
+                                  RBP, RBP_H,
+                                  RDI, RDI_H,
+                                  RSI, RSI_H,
+                                  RCX, RCX_H,
+                                  RBX, RBX_H,
+                                  R8,  R8_H,
+                                  R9,  R9_H,
+                                  R10, R10_H,
+                                  R11, R11_H,
+                                  R13, R13_H,
+                                  R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RSP, and RBP)
+reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
+                                RDI, RDI_H,
+                                RSI, RSI_H,
+                                RCX, RCX_H,
+                                RBX, RBX_H,
+                                R8,  R8_H,
+                                R9,  R9_H,
+                                R10, R10_H,
+                                R11, R11_H,
+                                R13, R13_H,
+                                R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX, RBX, and RSP)
+reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
+                                      RBP, RBP_H,
+                                      RDI, RDI_H,
+                                      RSI, RSI_H,
+                                      RCX, RCX_H,
+                                      R8,  R8_H,
+                                      R9,  R9_H,
+                                      R10, R10_H,
+                                      R11, R11_H,
+                                      R13, R13_H,
+                                      R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
+reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
+                                    RDI, RDI_H,
+                                    RSI, RSI_H,
+                                    RCX, RCX_H,
+                                    R8,  R8_H,
+                                    R9,  R9_H,
+                                    R10, R10_H,
+                                    R11, R11_H,
+                                    R13, R13_H,
+                                    R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 
 // Singleton class for RAX pointer register
 reg_class ptr_rax_reg(RAX, RAX_H);
@@ -251,59 +311,29 @@
 // Singleton class for RDI pointer register
 reg_class ptr_rdi_reg(RDI, RDI_H);
 
-// Singleton class for RBP pointer register
-reg_class ptr_rbp_reg(RBP, RBP_H);
-
 // Singleton class for stack pointer
 reg_class ptr_rsp_reg(RSP, RSP_H);
 
 // Singleton class for TLS pointer
 reg_class ptr_r15_reg(R15, R15_H);
 
-// Class for all long registers (except RSP)
-reg_class long_reg(RAX, RAX_H,
-                   RDX, RDX_H,
-                   RBP, RBP_H,
-                   RDI, RDI_H,
-                   RSI, RSI_H,
-                   RCX, RCX_H,
-                   RBX, RBX_H,
-                   R8,  R8_H,
-                   R9,  R9_H,
-                   R10, R10_H,
-                   R11, R11_H,
-                   R13, R13_H,
-                   R14, R14_H);
-
-// Class for all long registers except RAX, RDX (and RSP)
-reg_class long_no_rax_rdx_reg(RBP, RBP_H,
-                              RDI, RDI_H,
-                              RSI, RSI_H,
-                              RCX, RCX_H,
-                              RBX, RBX_H,
-                              R8,  R8_H,
-                              R9,  R9_H,
-                              R10, R10_H,
-                              R11, R11_H,
-                              R13, R13_H,
-                              R14, R14_H);
-
-// Class for all long registers except RCX (and RSP)
-reg_class long_no_rcx_reg(RBP, RBP_H,
-                          RDI, RDI_H,
-                          RSI, RSI_H,
-                          RAX, RAX_H,
-                          RDX, RDX_H,
-                          RBX, RBX_H,
-                          R8,  R8_H,
-                          R9,  R9_H,
-                          R10, R10_H,
-                          R11, R11_H,
-                          R13, R13_H,
-                          R14, R14_H);
-
-// Class for all long registers except RAX (and RSP)
-reg_class long_no_rax_reg(RBP, RBP_H,
+// Class for all long registers (excluding RSP)
+reg_class long_reg_with_rbp(RAX, RAX_H,
+                            RDX, RDX_H,
+                            RBP, RBP_H,
+                            RDI, RDI_H,
+                            RSI, RSI_H,
+                            RCX, RCX_H,
+                            RBX, RBX_H,
+                            R8,  R8_H,
+                            R9,  R9_H,
+                            R10, R10_H,
+                            R11, R11_H,
+                            R13, R13_H,
+                            R14, R14_H);
+
+// Class for all long registers (excluding RSP and RBP)
+reg_class long_reg_no_rbp(RAX, RAX_H,
                           RDX, RDX_H,
                           RDI, RDI_H,
                           RSI, RSI_H,
@@ -316,6 +346,67 @@
                           R13, R13_H,
                           R14, R14_H);
 
+// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
+reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RAX, RDX and RSP)
+reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
+                                       RDI, RDI_H,
+                                       RSI, RSI_H,
+                                       RCX, RCX_H,
+                                       RBX, RBX_H,
+                                       R8,  R8_H,
+                                       R9,  R9_H,
+                                       R10, R10_H,
+                                       R11, R11_H,
+                                       R13, R13_H,
+                                       R14, R14_H);
+
+// Class for all long registers (excluding RAX, RDX, RSP, and RBP)
+reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
+                                     RSI, RSI_H,
+                                     RCX, RCX_H,
+                                     RBX, RBX_H,
+                                     R8,  R8_H,
+                                     R9,  R9_H,
+                                     R10, R10_H,
+                                     R11, R11_H,
+                                     R13, R13_H,
+                                     R14, R14_H);
+
+// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RCX and RSP)
+reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
+                                   RDI, RDI_H,
+                                   RSI, RSI_H,
+                                   RAX, RAX_H,
+                                   RDX, RDX_H,
+                                   RBX, RBX_H,
+                                   R8,  R8_H,
+                                   R9,  R9_H,
+                                   R10, R10_H,
+                                   R11, R11_H,
+                                   R13, R13_H,
+                                   R14, R14_H);
+
+// Class for all long registers (excluding RCX, RSP, and RBP)
+reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
+                                 RSI, RSI_H,
+                                 RAX, RAX_H,
+                                 RDX, RDX_H,
+                                 RBX, RBX_H,
+                                 R8,  R8_H,
+                                 R9,  R9_H,
+                                 R10, R10_H,
+                                 R11, R11_H,
+                                 R13, R13_H,
+                                 R14, R14_H);
+
+// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
+reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
 // Singleton class for RAX long register
 reg_class long_rax_reg(RAX, RAX_H);
 
@@ -325,27 +416,27 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
-// Class for all int registers (except RSP)
-reg_class int_reg(RAX,
-                  RDX,
-                  RBP,
-                  RDI,
-                  RSI,
-                  RCX,
-                  RBX,
-                  R8,
-                  R9,
-                  R10,
-                  R11,
-                  R13,
-                  R14);
-
-// Class for all int registers except RCX (and RSP)
-reg_class int_no_rcx_reg(RAX,
+// Class for all int registers (excluding RSP)
+reg_class int_reg_with_rbp(RAX,
+                           RDX,
+                           RBP,
+                           RDI,
+                           RSI,
+                           RCX,
+                           RBX,
+                           R8,
+                           R9,
+                           R10,
+                           R11,
+                           R13,
+                           R14);
+
+// Class for all int registers (excluding RSP and RBP)
+reg_class int_reg_no_rbp(RAX,
                          RDX,
-                         RBP,
                          RDI,
                          RSI,
+                         RCX,
                          RBX,
                          R8,
                          R9,
@@ -354,18 +445,66 @@
                          R13,
                          R14);
 
-// Class for all int registers except RAX, RDX (and RSP)
-reg_class int_no_rax_rdx_reg(RBP,
-                             RDI,
-                             RSI,
-                             RCX,
-                             RBX,
-                             R8,
-                             R9,
-                             R10,
-                             R11,
-                             R13,
-                             R14);
+// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
+reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RCX and RSP)
+reg_class int_no_rcx_reg_with_rbp(RAX,
+                                  RDX,
+                                  RBP,
+                                  RDI,
+                                  RSI,
+                                  RBX,
+                                  R8,
+                                  R9,
+                                  R10,
+                                  R11,
+                                  R13,
+                                  R14);
+
+// Class for all int registers (excluding RCX, RSP, and RBP)
+reg_class int_no_rcx_reg_no_rbp(RAX,
+                                RDX,
+                                RDI,
+                                RSI,
+                                RBX,
+                                R8,
+                                R9,
+                                R10,
+                                R11,
+                                R13,
+                                R14);
+
+// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
+reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RAX, RDX, and RSP)
+reg_class int_no_rax_rdx_reg_with_rbp(RBP,
+                                      RDI,
+                                      RSI,
+                                      RCX,
+                                      RBX,
+                                      R8,
+                                      R9,
+                                      R10,
+                                      R11,
+                                      R13,
+                                      R14);
+
+// Class for all int registers (excluding RAX, RDX, RSP, and RBP)
+reg_class int_no_rax_rdx_reg_no_rbp(RDI,
+                                    RSI,
+                                    RCX,
+                                    RBX,
+                                    R8,
+                                    R9,
+                                    R10,
+                                    R11,
+                                    R13,
+                                    R14);
+
+// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 
 // Singleton class for RAX int register
 reg_class int_rax_reg(RAX);
@@ -396,9 +535,6 @@
 
 #define __ _masm.
 
-static int preserve_SP_size() {
-  return 3;  // rex.w, op, rm(reg/reg)
-}
 static int clear_avx_size() {
   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 }
@@ -409,9 +545,7 @@
 int MachCallStaticJavaNode::ret_addr_offset()
 {
   int offset = 5; // 5 bytes from start of call to where return address points
-  offset += clear_avx_size();
-  if (_method_handle_invoke)
-    offset += preserve_SP_size();
+  offset += clear_avx_size();  
   return offset;
 }
 
@@ -450,16 +584,6 @@
 
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const
-{
-  current_offset += preserve_SP_size();   // skip mov rbp, rsp
-  current_offset += clear_avx_size(); // skip vzeroupper
-  current_offset += 1; // skip call opcode byte
-  return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 {
   current_offset += clear_avx_size(); // skip vzeroupper
@@ -724,6 +848,10 @@
     st->print("# stack bang (%d bytes)", bangsize);
     st->print("\n\t");
     st->print("pushq   rbp\t# Save rbp");
+    if (PreserveFramePointer) {
+        st->print("\n\t");
+        st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
+    }
     if (framesize) {
       st->print("\n\t");
       st->print("subq    rsp, #%d\t# Create frame",framesize);
@@ -732,7 +860,11 @@
     st->print("subq    rsp, #%d\t# Create frame",framesize);
     st->print("\n\t");
     framesize -= wordSize;
-    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
+    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);    
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("movq    rbp, [rsp + #%d]\t# Save the caller's SP into rbp", (framesize + wordSize));
+    }
   }
 
   if (VerifyStackAtCalls) {
@@ -1598,8 +1730,9 @@
   return LONG_RDX_REG_mask();
 }
 
+// Register for saving SP into on method handle invokes. Not used on x86_64.
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return PTR_RBP_REG_mask();
+    return NO_REG_mask();
 }
 
 %}
@@ -3202,7 +3335,7 @@
 // Pointer Register
 operand any_RegP()
 %{
-  constraint(ALLOC_IN_RC(any_reg));
+  constraint(ALLOC_IN_RC(any_reg));  
   match(RegP);
   match(rax_RegP);
   match(rbx_RegP);
@@ -3224,8 +3357,8 @@
   match(rbx_RegP);
   match(rdi_RegP);
   match(rsi_RegP);
-  match(rbp_RegP);
-  match(r15_RegP);  // See Q&A below about r15_RegP.
+  match(rbp_RegP);  // See Q&A below about
+  match(r15_RegP);  // r15_RegP and rbp_RegP.
 
   format %{ %}
   interface(REG_INTER);
@@ -3241,11 +3374,14 @@
 
 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
-// It's fine for an instruction input which expects rRegP to match a r15_RegP.
+// It's fine for an instruction input that expects rRegP to match a r15_RegP.
 // The output of an instruction is controlled by the allocator, which respects
 // register class masks, not match rules.  Unless an instruction mentions
 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 // by the allocator as an input.
+// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
+// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
+// result, RBP is not included in the output of the instruction either.
 
 operand no_rax_RegP()
 %{
@@ -3259,9 +3395,11 @@
   interface(REG_INTER);
 %}
 
+// This operand is not allowed to use RBP even if
+// RBP is not used to hold the frame pointer.
 operand no_rbp_RegP()
 %{
-  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
+  constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
   match(RegP);
   match(rbx_RegP);
   match(rsi_RegP);
@@ -3338,16 +3476,6 @@
   interface(REG_INTER);
 %}
 
-operand rbp_RegP()
-%{
-  constraint(ALLOC_IN_RC(ptr_rbp_reg));
-  match(RegP);
-  match(rRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
 operand r15_RegP()
 %{
   constraint(ALLOC_IN_RC(ptr_r15_reg));
@@ -11410,7 +11538,6 @@
 //       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
-  predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
   effect(USE meth);
 
   ins_cost(300);
@@ -11421,27 +11548,6 @@
   ins_alignment(4);
 %}
 
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
-  match(CallStaticJava);
-  predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
-  effect(USE meth);
-  // RBP is saved by all callees (for interpreter stack correction).
-  // We use it here for a similar purpose, in {preserve,restore}_SP.
-
-  ins_cost(300);
-  format %{ "call,static/MethodHandle " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode(clear_avx, preserve_SP,
-             Java_Static_Call(meth),
-             restore_SP,
-             call_epilog);
-  ins_pipe(pipe_slow);
-  ins_alignment(4);
-%}
-
 // Call Java Dynamic Instruction
 // Note: If this code changes, the corresponding ret_addr_offset() and
 //       compute_padding() functions will have to be adjusted.
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -4083,7 +4083,7 @@
       ValueType* type = apop()->type();
       if (type->is_constant()) {
         ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget();
-        // If the target is another method handle invoke try recursivly to get
+        // If the target is another method handle invoke, try to recursively get
         // a better target.
         if (target->is_method_handle_intrinsic()) {
           if (try_method_handle_inline(target)) {
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -458,7 +458,7 @@
 //-------------------visits--------------------------
 
 // complete rework of LIR instruction visitor.
-// The virtual calls for each instruction type is replaced by a big
+// The virtual call for each instruction type is replaced by a big
 // switch that adds the operands for each instruction
 
 void LIR_OpVisitState::visit(LIR_Op* op) {
@@ -825,7 +825,8 @@
       }
 
       if (opJavaCall->_info)                     do_info(opJavaCall->_info);
-      if (opJavaCall->is_method_handle_invoke()) {
+      if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr &&
+          opJavaCall->is_method_handle_invoke()) {
         opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr();
         do_temp(opJavaCall->_method_handle_invoke_SP_save_opr);
       }
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -1219,10 +1219,8 @@
   // JSR 292 support.
   bool is_invokedynamic() const                  { return code() == lir_dynamic_call; }
   bool is_method_handle_invoke() const {
-    return
-      method()->is_compiled_lambda_form()  // Java-generated adapter
-      ||
-      method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
+    return method()->is_compiled_lambda_form() ||   // Java-generated lambda form
+           method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
   }
 
   intptr_t vtable_offset() const {
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -2875,7 +2875,7 @@
 //   g) lock result registers and emit call operation
 //
 // Before issuing a call, we must spill-save all values on stack
-// that are in caller-save register. "spill-save" moves thos registers
+// that are in caller-save register. "spill-save" moves those registers
 // either in a free callee-save register or spills them if no free
 // callee save register is available.
 //
@@ -2883,7 +2883,7 @@
 // - if invoked between e) and f), we may lock callee save
 //   register in "spill-save" that destroys the receiver register
 //   before f) is executed
-// - if we rearange the f) to be earlier, by loading %o0, it
+// - if we rearrange f) to be earlier (by loading %o0) it
 //   may destroy a value on the stack that is currently in %o0
 //   and is waiting to be spilled
 // - if we keep the receiver locked while doing spill-save,
@@ -2916,14 +2916,16 @@
   assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match");
 
   // JSR 292
-  // Preserve the SP over MethodHandle call sites.
+  // Preserve the SP over MethodHandle call sites, if needed.
   ciMethod* target = x->target();
   bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant?
                                   target->is_method_handle_intrinsic() ||
                                   target->is_compiled_lambda_form());
   if (is_method_handle_invoke) {
     info->set_is_method_handle_invoke(true);
-    __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
+    if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
+        __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
+    }
   }
 
   switch (x->code()) {
@@ -2963,8 +2965,9 @@
   }
 
   // JSR 292
-  // Restore the SP after MethodHandle call sites.
-  if (is_method_handle_invoke) {
+  // Restore the SP after MethodHandle call sites, if needed.
+  if (is_method_handle_invoke
+      && FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
     __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer());
   }
 
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -631,11 +631,11 @@
   }
   int max_inline_level_adjust = 0;
   if (caller_jvms->method() != NULL) {
-    if (caller_jvms->method()->is_compiled_lambda_form())
+    if (caller_jvms->method()->is_compiled_lambda_form()) {
       max_inline_level_adjust += 1;  // don't count actions in MH or indy adapter frames
-    else if (callee_method->is_method_handle_intrinsic() ||
-             callee_method->is_compiled_lambda_form()) {
-      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implem
+    } else if (callee_method->is_method_handle_intrinsic() ||
+               callee_method->is_compiled_lambda_form()) {
+      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implementation
     }
     if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) {
       CompileTask::print_inline_indent(inline_level());
--- a/hotspot/src/share/vm/prims/forte.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/prims/forte.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -171,8 +171,27 @@
   // Now do we have a useful PcDesc?
   if (pc_desc == NULL ||
       pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
-    // No debug information available for this pc
-    // vframeStream would explode if we try and walk the frames.
+    // No debug information is available for this PC.
+    //
+    // vframeStreamCommon::fill_from_frame() will decode the frame depending
+    // on the state of the thread.
+    //
+    // Case #1: If the thread is in Java (state == _thread_in_Java), then
+    // the vframeStreamCommon object will be filled as if the frame were a native
+    // compiled frame. Therefore, no debug information is needed.
+    //
+    // Case #2: If the thread is in any other state, then two steps will be performed:
+    // - if asserts are enabled, found_bad_method_frame() will be called and
+    //   the assert in found_bad_method_frame() will be triggered;
+    // - if asserts are disabled, the vframeStreamCommon object will be filled
+    //   as if it were a native compiled frame.
+    //
+    // Case (2) is similar to the way interpreter frames are processed in
+    // vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI
+    // was found for an interpreted frame. If asserts are enabled, the assert
+    // in found_bad_method_frame() will be triggered. If asserts are disabled,
+    // the vframeStreamCommon object will be filled afterwards as if the
+    // interpreter were at the point of entering into the method.
     return false;
   }
 
@@ -229,9 +248,10 @@
     // a valid method. Then again we may have caught an interpreter
     // frame in the middle of construction and the bci field is
     // not yet valid.
-
-    *method_p = method;
     if (!method->is_valid_method()) return false;
+    *method_p = method; // If the Method* found is invalid, it is
+                        // ignored by forte_fill_call_trace_given_top().
+                        // So set method_p only if the Method is valid.
 
     address bcp = fr->interpreter_frame_bcp();
     int bci = method->validate_bci_from_bcp(bcp);
@@ -245,18 +265,33 @@
 }
 
 
-// Determine if 'fr' can be used to find an initial Java frame.
-// Return false if it can not find a fully decipherable Java frame
-// (in other words a frame that isn't safe to use in a vframe stream).
-// Obviously if it can't even find a Java frame false will also be returned.
+// Determine if a Java frame can be found starting with the frame 'fr'.
+//
+// Check the return value of find_initial_Java_frame and the value of
+// 'method_p' to decide on how use the results returned by this method.
+//
+// If 'method_p' is not NULL, an initial Java frame has been found and
+// the stack can be walked starting from that initial frame. In this case,
+// 'method_p' points to the Method that the initial frame belongs to and
+// the initial Java frame is returned in initial_frame_p.
+//
+// find_initial_Java_frame() returns true if a Method has been found (i.e.,
+// 'method_p' is not NULL) and the initial frame that belongs to that Method
+// is decipherable.
 //
-// If we find a Java frame decipherable or not then by definition we have
-// identified a method and that will be returned to the caller via method_p.
-// If we can determine a bci that is returned also. (Hmm is it possible
-// to return a method and bci and still return false? )
+// A frame is considered to be decipherable:
+//
+// - if the frame is a compiled frame and a PCDesc is available;
+//
+// - if the frame is an interpreter frame that is valid or the thread is
+//   state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked).
 //
-// The initial Java frame we find (if any) is return via initial_frame_p.
+// Note that find_initial_Java_frame() can return false even if an initial
+// Java method was found (e.g., there is no PCDesc available for the method).
 //
+// If 'method_p' is NULL, it was not possible to find a Java frame when
+// walking the stack starting from 'fr'. In this case find_initial_Java_frame
+// returns false.
 
 static bool find_initial_Java_frame(JavaThread* thread,
                                     frame* fr,
@@ -276,8 +311,6 @@
   // recognizable to us. This should only happen if we are in a JRT_LEAF
   // or something called by a JRT_LEAF method.
 
-
-
   frame candidate = *fr;
 
   // If the starting frame we were given has no codeBlob associated with
@@ -332,9 +365,11 @@
       nmethod* nm = (nmethod*) candidate.cb();
       *method_p = nm->method();
 
-      // If the frame isn't fully decipherable then the default
-      // value for the bci is a signal that we don't have a bci.
-      // If we have a decipherable frame this bci value will
+      // If the frame is not decipherable, then the value of -1
+      // for the BCI is used to signal that no BCI is available.
+      // Furthermore, the method returns false in this case.
+      //
+      // If a decipherable frame is available, the BCI value will
       // not be used.
 
       *bci_p = -1;
@@ -345,9 +380,9 @@
 
       if (nm->is_native_method()) return true;
 
-      // If it isn't decipherable then we have found a pc that doesn't
-      // have a PCDesc that can get us a bci however we did find
-      // a method
+      // If the frame is not decipherable, then a PC was found
+      // that does not have a PCDesc from which a BCI can be obtained.
+      // Nevertheless, a Method was found.
 
       if (!is_decipherable_compiled_frame(thread, &candidate, nm)) {
         return false;
@@ -356,7 +391,7 @@
       // is_decipherable_compiled_frame may modify candidate's pc
       *initial_frame_p = candidate;
 
-      assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid");
+      assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable");
 
       return true;
     }
@@ -386,17 +421,17 @@
 
   frame initial_Java_frame;
   Method* method;
-  int bci;
+  int bci = -1; // assume BCI is not available for method
+                // update with correct information if available
   int count;
 
   count = 0;
   assert(trace->frames != NULL, "trace->frames must be non-NULL");
 
-  bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
+  // Walk the stack starting from 'top_frame' and search for an initial Java frame.
+  find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
 
-  // The frame might not be walkable but still recovered a method
-  // (e.g. an nmethod with no scope info for the pc)
-
+  // Check if a Java Method has been found.
   if (method == NULL) return;
 
   if (!method->is_valid_method()) {
@@ -404,29 +439,6 @@
     return;
   }
 
-  // We got a Java frame however it isn't fully decipherable
-  // so it won't necessarily be safe to use it for the
-  // initial frame in the vframe stream.
-
-  if (!fully_decipherable) {
-    // Take whatever method the top-frame decoder managed to scrape up.
-    // We look further at the top frame only if non-safepoint
-    // debugging information is available.
-    count++;
-    trace->num_frames = count;
-    trace->frames[0].method_id = method->find_jmethod_id_or_null();
-    if (!method->is_native()) {
-      trace->frames[0].lineno = bci;
-    } else {
-      trace->frames[0].lineno = -3;
-    }
-
-    if (!initial_Java_frame.safe_for_sender(thd)) return;
-
-    RegisterMap map(thd, false);
-    initial_Java_frame = initial_Java_frame.sender(&map);
-  }
-
   vframeStreamForte st(thd, initial_Java_frame, false);
 
   for (; !st.at_end() && count < depth; st.forte_next(), count++) {
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -3918,7 +3918,11 @@
           "Use locked-tracing when doing event-based tracing")              \
                                                                             \
   diagnostic(bool, UseUnalignedAccesses, false,                             \
-          "Use unaligned memory accesses in sun.misc.Unsafe")
+          "Use unaligned memory accesses in sun.misc.Unsafe")               \
+                                                                            \
+  product_pd(bool, PreserveFramePointer,                                    \
+             "Use the FP register for holding the frame pointer "           \
+             "and not as a general purpose register.")
 
 /*
  *  Macros for factoring of globals
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Mon Apr 27 10:49:43 2015 +0200
@@ -1179,7 +1179,7 @@
 #endif
 
   // JSR 292 key invariant:
-  // If the resolved method is a MethodHandle invoke target the call
+  // If the resolved method is a MethodHandle invoke target, the call
   // site must be a MethodHandle call site, because the lambda form might tail-call
   // leaving the stack in a state unknown to either caller or callee
   // TODO detune for now but we might need it again
--- a/hotspot/src/share/vm/runtime/vframe.hpp	Wed Apr 22 19:10:03 2015 +0300
+++ b/hotspot/src/share/vm/runtime/vframe.hpp	Mon Apr 27 10:49:43 2015 +0200
@@ -389,12 +389,12 @@
       decode_offset < 0 ||
       decode_offset >= nm()->scopes_data_size()) {
     // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
-    // If we attempt to read nmethod::scopes_data at serialized_null (== 0),
-    // or if we read some at other crazy offset,
-    // we will decode garbage and make wild references into the heap,
-    // leading to crashes in product mode.
-    // (This isn't airtight, of course, since there are internal
-    // offsets which are also crazy.)
+    // If we read nmethod::scopes_data at serialized_null (== 0)
+    // or if read some at other invalid offset, invalid values will be decoded.
+    // Based on these values, invalid heap locations could be referenced
+    // that could lead to crashes in product mode.
+    // Therefore, do not use the decode offset if invalid, but fill the frame
+    // as it were a native compiled frame (no Java-level assumptions).
 #ifdef ASSERT
     if (WizardMode) {
       tty->print_cr("Error in fill_from_frame: pc_desc for "
@@ -514,9 +514,15 @@
   address   bcp    = _frame.interpreter_frame_bcp();
   int       bci    = method->validate_bci_from_bcp(bcp);
   // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
+  // AsyncGetCallTrace interrupts the VM asynchronously. As a result
+  // it is possible to access an interpreter frame for which
+  // no Java-level information is yet available (e.g., becasue
+  // the frame was being created when the VM interrupted it).
+  // In this scenario, pretend that the interpreter is at the point
+  // of entering the method.
   if (bci < 0) {
     found_bad_method_frame();
-    bci = 0;  // pretend it's on the point of entering
+    bci = 0;
   }
   _mode   = interpreted_mode;
   _method = method;