src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
changeset 47216 71c04702a3d5
parent 46727 6e4a84748e2c
child 47787 11b8ac93804c
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 /*
       
     2  * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  *
       
    23  */
       
    24 
       
    25 #include "precompiled.hpp"
       
    26 #ifndef _WINDOWS
       
    27 #include "alloca.h"
       
    28 #endif
       
    29 #include "asm/macroAssembler.hpp"
       
    30 #include "asm/macroAssembler.inline.hpp"
       
    31 #include "code/debugInfoRec.hpp"
       
    32 #include "code/icBuffer.hpp"
       
    33 #include "code/vtableStubs.hpp"
       
    34 #include "interpreter/interpreter.hpp"
       
    35 #include "logging/log.hpp"
       
    36 #include "memory/resourceArea.hpp"
       
    37 #include "oops/compiledICHolder.hpp"
       
    38 #include "runtime/sharedRuntime.hpp"
       
    39 #include "runtime/vframeArray.hpp"
       
    40 #include "utilities/align.hpp"
       
    41 #include "vm_version_x86.hpp"
       
    42 #include "vmreg_x86.inline.hpp"
       
    43 #ifdef COMPILER1
       
    44 #include "c1/c1_Runtime1.hpp"
       
    45 #endif
       
    46 #ifdef COMPILER2
       
    47 #include "opto/runtime.hpp"
       
    48 #endif
       
    49 #if INCLUDE_JVMCI
       
    50 #include "jvmci/jvmciJavaClasses.hpp"
       
    51 #endif
       
    52 
       
    53 #define __ masm->
       
    54 
       
    55 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
       
    56 
       
    57 class SimpleRuntimeFrame {
       
    58 
       
    59   public:
       
    60 
       
    61   // Most of the runtime stubs have this simple frame layout.
       
    62   // This class exists to make the layout shared in one place.
       
    63   // Offsets are for compiler stack slots, which are jints.
       
    64   enum layout {
       
    65     // The frame sender code expects that rbp will be in the "natural" place and
       
    66     // will override any oopMap setting for it. We must therefore force the layout
       
    67     // so that it agrees with the frame sender code.
       
    68     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
       
    69     rbp_off2,
       
    70     return_off, return_off2,
       
    71     framesize
       
    72   };
       
    73 };
       
    74 
       
    75 class RegisterSaver {
       
    76   // Capture info about frame layout.  Layout offsets are in jint
       
    77   // units because compiler frame slots are jints.
       
    78 #define XSAVE_AREA_BEGIN 160
       
    79 #define XSAVE_AREA_YMM_BEGIN 576
       
    80 #define XSAVE_AREA_ZMM_BEGIN 1152
       
    81 #define XSAVE_AREA_UPPERBANK 1664
       
    82 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
       
    83 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
       
    84 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
       
    85   enum layout {
       
    86     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
       
    87     xmm_off       = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt,            // offset in fxsave save area
       
    88     DEF_XMM_OFFS(0),
       
    89     DEF_XMM_OFFS(1),
       
    90     // 2..15 are implied in range usage
       
    91     ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
       
    92     DEF_YMM_OFFS(0),
       
    93     DEF_YMM_OFFS(1),
       
    94     // 2..15 are implied in range usage
       
    95     zmm_high = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
       
    96     zmm_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
       
    97     DEF_ZMM_OFFS(16),
       
    98     DEF_ZMM_OFFS(17),
       
    99     // 18..31 are implied in range usage
       
   100     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
       
   101     fpu_stateH_end,
       
   102     r15_off, r15H_off,
       
   103     r14_off, r14H_off,
       
   104     r13_off, r13H_off,
       
   105     r12_off, r12H_off,
       
   106     r11_off, r11H_off,
       
   107     r10_off, r10H_off,
       
   108     r9_off,  r9H_off,
       
   109     r8_off,  r8H_off,
       
   110     rdi_off, rdiH_off,
       
   111     rsi_off, rsiH_off,
       
   112     ignore_off, ignoreH_off,  // extra copy of rbp
       
   113     rsp_off, rspH_off,
       
   114     rbx_off, rbxH_off,
       
   115     rdx_off, rdxH_off,
       
   116     rcx_off, rcxH_off,
       
   117     rax_off, raxH_off,
       
   118     // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
       
   119     align_off, alignH_off,
       
   120     flags_off, flagsH_off,
       
   121     // The frame sender code expects that rbp will be in the "natural" place and
       
   122     // will override any oopMap setting for it. We must therefore force the layout
       
   123     // so that it agrees with the frame sender code.
       
   124     rbp_off, rbpH_off,        // copy of rbp we will restore
       
   125     return_off, returnH_off,  // slot for return address
       
   126     reg_save_size             // size in compiler stack slots
       
   127   };
       
   128 
       
   129  public:
       
   130   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
       
   131   static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
       
   132 
       
   133   // Offsets into the register save area
       
   134   // Used by deoptimization when it is managing result register
       
   135   // values on its own
       
   136 
       
   137   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
       
   138   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
       
   139   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
       
   140   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
       
   141   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
       
   142 
       
   143   // During deoptimization only the result registers need to be restored,
       
   144   // all the other values have already been extracted.
       
   145   static void restore_result_registers(MacroAssembler* masm);
       
   146 };
       
   147 
       
   148 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
       
   149   int off = 0;
       
   150   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
       
   151   if (UseAVX < 3) {
       
   152     num_xmm_regs = num_xmm_regs/2;
       
   153   }
       
   154 #if defined(COMPILER2) || INCLUDE_JVMCI
       
   155   if (save_vectors) {
       
   156     assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
       
   157     assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
       
   158   }
       
   159 #else
       
   160   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
       
   161 #endif
       
   162 
       
   163   // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
       
   164   int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
       
   165   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
       
   166   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
       
   167   // CodeBlob frame size is in words.
       
   168   int frame_size_in_words = frame_size_in_bytes / wordSize;
       
   169   *total_frame_words = frame_size_in_words;
       
   170 
       
   171   // Save registers, fpu state, and flags.
       
   172   // We assume caller has already pushed the return address onto the
       
   173   // stack, so rsp is 8-byte aligned here.
       
   174   // We push rpb twice in this sequence because we want the real rbp
       
   175   // to be under the return like a normal enter.
       
   176 
       
   177   __ enter();          // rsp becomes 16-byte aligned here
       
   178   __ push_CPU_state(); // Push a multiple of 16 bytes
       
   179 
       
   180   // push cpu state handles this on EVEX enabled targets
       
   181   if (save_vectors) {
       
   182     // Save upper half of YMM registers(0..15)
       
   183     int base_addr = XSAVE_AREA_YMM_BEGIN;
       
   184     for (int n = 0; n < 16; n++) {
       
   185       __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
       
   186     }
       
   187     if (VM_Version::supports_evex()) {
       
   188       // Save upper half of ZMM registers(0..15)
       
   189       base_addr = XSAVE_AREA_ZMM_BEGIN;
       
   190       for (int n = 0; n < 16; n++) {
       
   191         __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
       
   192       }
       
   193       // Save full ZMM registers(16..num_xmm_regs)
       
   194       base_addr = XSAVE_AREA_UPPERBANK;
       
   195       off = 0;
       
   196       int vector_len = Assembler::AVX_512bit;
       
   197       for (int n = 16; n < num_xmm_regs; n++) {
       
   198         __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
       
   199       }
       
   200     }
       
   201   } else {
       
   202     if (VM_Version::supports_evex()) {
       
   203       // Save upper bank of ZMM registers(16..31) for double/float usage
       
   204       int base_addr = XSAVE_AREA_UPPERBANK;
       
   205       off = 0;
       
   206       for (int n = 16; n < num_xmm_regs; n++) {
       
   207         __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
       
   208       }
       
   209     }
       
   210   }
       
   211   __ vzeroupper();
       
   212   if (frame::arg_reg_save_area_bytes != 0) {
       
   213     // Allocate argument register save area
       
   214     __ subptr(rsp, frame::arg_reg_save_area_bytes);
       
   215   }
       
   216 
       
   217   // Set an oopmap for the call site.  This oopmap will map all
       
   218   // oop-registers and debug-info registers as callee-saved.  This
       
   219   // will allow deoptimization at this safepoint to find all possible
       
   220   // debug-info recordings, as well as let GC find all oops.
       
   221 
       
   222   OopMapSet *oop_maps = new OopMapSet();
       
   223   OopMap* map = new OopMap(frame_size_in_slots, 0);
       
   224 
       
   225 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
       
   226 
       
   227   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
       
   228   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
       
   229   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
       
   230   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
       
   231   // rbp location is known implicitly by the frame sender code, needs no oopmap
       
   232   // and the location where rbp was saved by is ignored
       
   233   map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
       
   234   map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
       
   235   map->set_callee_saved(STACK_OFFSET( r8_off  ), r8->as_VMReg());
       
   236   map->set_callee_saved(STACK_OFFSET( r9_off  ), r9->as_VMReg());
       
   237   map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
       
   238   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
       
   239   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
       
   240   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
       
   241   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
       
   242   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
       
   243   // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
       
   244   // on EVEX enabled targets, we get it included in the xsave area
       
   245   off = xmm0_off;
       
   246   int delta = xmm1_off - off;
       
   247   for (int n = 0; n < 16; n++) {
       
   248     XMMRegister xmm_name = as_XMMRegister(n);
       
   249     map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
       
   250     off += delta;
       
   251   }
       
   252   if(UseAVX > 2) {
       
   253     // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
       
   254     off = zmm16_off;
       
   255     delta = zmm17_off - off;
       
   256     for (int n = 16; n < num_xmm_regs; n++) {
       
   257       XMMRegister zmm_name = as_XMMRegister(n);
       
   258       map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
       
   259       off += delta;
       
   260     }
       
   261   }
       
   262 
       
   263 #if defined(COMPILER2) || INCLUDE_JVMCI
       
   264   if (save_vectors) {
       
   265     off = ymm0_off;
       
   266     int delta = ymm1_off - off;
       
   267     for (int n = 0; n < 16; n++) {
       
   268       XMMRegister ymm_name = as_XMMRegister(n);
       
   269       map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
       
   270       off += delta;
       
   271     }
       
   272   }
       
   273 #endif // COMPILER2 || INCLUDE_JVMCI
       
   274 
       
   275   // %%% These should all be a waste but we'll keep things as they were for now
       
   276   if (true) {
       
   277     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
       
   278     map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
       
   279     map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
       
   280     map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
       
   281     // rbp location is known implicitly by the frame sender code, needs no oopmap
       
   282     map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
       
   283     map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
       
   284     map->set_callee_saved(STACK_OFFSET( r8H_off  ), r8->as_VMReg()->next());
       
   285     map->set_callee_saved(STACK_OFFSET( r9H_off  ), r9->as_VMReg()->next());
       
   286     map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
       
   287     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
       
   288     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
       
   289     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
       
   290     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
       
   291     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
       
   292     // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
       
   293     // on EVEX enabled targets, we get it included in the xsave area
       
   294     off = xmm0H_off;
       
   295     delta = xmm1H_off - off;
       
   296     for (int n = 0; n < 16; n++) {
       
   297       XMMRegister xmm_name = as_XMMRegister(n);
       
   298       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
       
   299       off += delta;
       
   300     }
       
   301     if (UseAVX > 2) {
       
   302       // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
       
   303       off = zmm16H_off;
       
   304       delta = zmm17H_off - off;
       
   305       for (int n = 16; n < num_xmm_regs; n++) {
       
   306         XMMRegister zmm_name = as_XMMRegister(n);
       
   307         map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
       
   308         off += delta;
       
   309       }
       
   310     }
       
   311   }
       
   312 
       
   313   return map;
       
   314 }
       
   315 
       
   316 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
       
   317   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
       
   318   if (UseAVX < 3) {
       
   319     num_xmm_regs = num_xmm_regs/2;
       
   320   }
       
   321   if (frame::arg_reg_save_area_bytes != 0) {
       
   322     // Pop arg register save area
       
   323     __ addptr(rsp, frame::arg_reg_save_area_bytes);
       
   324   }
       
   325 
       
   326 #if defined(COMPILER2) || INCLUDE_JVMCI
       
   327   if (restore_vectors) {
       
   328     assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
       
   329     assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
       
   330   }
       
   331 #else
       
   332   assert(!restore_vectors, "vectors are generated only by C2");
       
   333 #endif
       
   334 
       
   335   __ vzeroupper();
       
   336 
       
   337   // On EVEX enabled targets everything is handled in pop fpu state
       
   338   if (restore_vectors) {
       
   339     // Restore upper half of YMM registers (0..15)
       
   340     int base_addr = XSAVE_AREA_YMM_BEGIN;
       
   341     for (int n = 0; n < 16; n++) {
       
   342       __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
       
   343     }
       
   344     if (VM_Version::supports_evex()) {
       
   345       // Restore upper half of ZMM registers (0..15)
       
   346       base_addr = XSAVE_AREA_ZMM_BEGIN;
       
   347       for (int n = 0; n < 16; n++) {
       
   348         __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
       
   349       }
       
   350       // Restore full ZMM registers(16..num_xmm_regs)
       
   351       base_addr = XSAVE_AREA_UPPERBANK;
       
   352       int vector_len = Assembler::AVX_512bit;
       
   353       int off = 0;
       
   354       for (int n = 16; n < num_xmm_regs; n++) {
       
   355         __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
       
   356       }
       
   357     }
       
   358   } else {
       
   359     if (VM_Version::supports_evex()) {
       
   360       // Restore upper bank of ZMM registers(16..31) for double/float usage
       
   361       int base_addr = XSAVE_AREA_UPPERBANK;
       
   362       int off = 0;
       
   363       for (int n = 16; n < num_xmm_regs; n++) {
       
   364         __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
       
   365       }
       
   366     }
       
   367   }
       
   368 
       
   369   // Recover CPU state
       
   370   __ pop_CPU_state();
       
   371   // Get the rbp described implicitly by the calling convention (no oopMap)
       
   372   __ pop(rbp);
       
   373 }
       
   374 
       
   375 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
       
   376 
       
   377   // Just restore result register. Only used by deoptimization. By
       
   378   // now any callee save register that needs to be restored to a c2
       
   379   // caller of the deoptee has been extracted into the vframeArray
       
   380   // and will be stuffed into the c2i adapter we create for later
       
   381   // restoration so only result registers need to be restored here.
       
   382 
       
   383   // Restore fp result register
       
   384   __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
       
   385   // Restore integer result register
       
   386   __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
       
   387   __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
       
   388 
       
   389   // Pop all of the register save are off the stack except the return address
       
   390   __ addptr(rsp, return_offset_in_bytes());
       
   391 }
       
   392 
       
   393 // Is vector's size (in bytes) bigger than a size saved by default?
       
   394 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
       
   395 bool SharedRuntime::is_wide_vector(int size) {
       
   396   return size > 16;
       
   397 }
       
   398 
       
   399 size_t SharedRuntime::trampoline_size() {
       
   400   return 16;
       
   401 }
       
   402 
       
   403 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
       
   404   __ jump(RuntimeAddress(destination));
       
   405 }
       
   406 
       
   407 // The java_calling_convention describes stack locations as ideal slots on
       
   408 // a frame with no abi restrictions. Since we must observe abi restrictions
       
   409 // (like the placement of the register window) the slots must be biased by
       
   410 // the following value.
       
   411 static int reg2offset_in(VMReg r) {
       
   412   // Account for saved rbp and return address
       
   413   // This should really be in_preserve_stack_slots
       
   414   return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
       
   415 }
       
   416 
       
   417 static int reg2offset_out(VMReg r) {
       
   418   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
       
   419 }
       
   420 
       
   421 // ---------------------------------------------------------------------------
       
   422 // Read the array of BasicTypes from a signature, and compute where the
       
   423 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
       
   424 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
       
   425 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
       
   426 // as framesizes are fixed.
       
   427 // VMRegImpl::stack0 refers to the first slot 0(sp).
       
   428 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
       
   429 // up to RegisterImpl::number_of_registers) are the 64-bit
       
   430 // integer registers.
       
   431 
       
   432 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
       
   433 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
       
   434 // units regardless of build. Of course for i486 there is no 64 bit build
       
   435 
       
   436 // The Java calling convention is a "shifted" version of the C ABI.
       
   437 // By skipping the first C ABI register we can call non-static jni methods
       
   438 // with small numbers of arguments without having to shuffle the arguments
       
   439 // at all. Since we control the java ABI we ought to at least get some
       
   440 // advantage out of it.
       
   441 
       
   442 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
       
   443                                            VMRegPair *regs,
       
   444                                            int total_args_passed,
       
   445                                            int is_outgoing) {
       
   446 
       
   447   // Create the mapping between argument positions and
       
   448   // registers.
       
   449   static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
       
   450     j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5
       
   451   };
       
   452   static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
       
   453     j_farg0, j_farg1, j_farg2, j_farg3,
       
   454     j_farg4, j_farg5, j_farg6, j_farg7
       
   455   };
       
   456 
       
   457 
       
   458   uint int_args = 0;
       
   459   uint fp_args = 0;
       
   460   uint stk_args = 0; // inc by 2 each time
       
   461 
       
   462   for (int i = 0; i < total_args_passed; i++) {
       
   463     switch (sig_bt[i]) {
       
   464     case T_BOOLEAN:
       
   465     case T_CHAR:
       
   466     case T_BYTE:
       
   467     case T_SHORT:
       
   468     case T_INT:
       
   469       if (int_args < Argument::n_int_register_parameters_j) {
       
   470         regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
       
   471       } else {
       
   472         regs[i].set1(VMRegImpl::stack2reg(stk_args));
       
   473         stk_args += 2;
       
   474       }
       
   475       break;
       
   476     case T_VOID:
       
   477       // halves of T_LONG or T_DOUBLE
       
   478       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
       
   479       regs[i].set_bad();
       
   480       break;
       
   481     case T_LONG:
       
   482       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
       
   483       // fall through
       
   484     case T_OBJECT:
       
   485     case T_ARRAY:
       
   486     case T_ADDRESS:
       
   487       if (int_args < Argument::n_int_register_parameters_j) {
       
   488         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
       
   489       } else {
       
   490         regs[i].set2(VMRegImpl::stack2reg(stk_args));
       
   491         stk_args += 2;
       
   492       }
       
   493       break;
       
   494     case T_FLOAT:
       
   495       if (fp_args < Argument::n_float_register_parameters_j) {
       
   496         regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
       
   497       } else {
       
   498         regs[i].set1(VMRegImpl::stack2reg(stk_args));
       
   499         stk_args += 2;
       
   500       }
       
   501       break;
       
   502     case T_DOUBLE:
       
   503       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
       
   504       if (fp_args < Argument::n_float_register_parameters_j) {
       
   505         regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
       
   506       } else {
       
   507         regs[i].set2(VMRegImpl::stack2reg(stk_args));
       
   508         stk_args += 2;
       
   509       }
       
   510       break;
       
   511     default:
       
   512       ShouldNotReachHere();
       
   513       break;
       
   514     }
       
   515   }
       
   516 
       
   517   return align_up(stk_args, 2);
       
   518 }
       
   519 
       
   520 // Patch the callers callsite with entry to compiled code if it exists.
       
   521 static void patch_callers_callsite(MacroAssembler *masm) {
       
   522   Label L;
       
   523   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
       
   524   __ jcc(Assembler::equal, L);
       
   525 
       
   526   // Save the current stack pointer
       
   527   __ mov(r13, rsp);
       
   528   // Schedule the branch target address early.
       
   529   // Call into the VM to patch the caller, then jump to compiled callee
       
   530   // rax isn't live so capture return address while we easily can
       
   531   __ movptr(rax, Address(rsp, 0));
       
   532 
       
   533   // align stack so push_CPU_state doesn't fault
       
   534   __ andptr(rsp, -(StackAlignmentInBytes));
       
   535   __ push_CPU_state();
       
   536   __ vzeroupper();
       
   537   // VM needs caller's callsite
       
   538   // VM needs target method
       
   539   // This needs to be a long call since we will relocate this adapter to
       
   540   // the codeBuffer and it may not reach
       
   541 
       
   542   // Allocate argument register save area
       
   543   if (frame::arg_reg_save_area_bytes != 0) {
       
   544     __ subptr(rsp, frame::arg_reg_save_area_bytes);
       
   545   }
       
   546   __ mov(c_rarg0, rbx);
       
   547   __ mov(c_rarg1, rax);
       
   548   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
       
   549 
       
   550   // De-allocate argument register save area
       
   551   if (frame::arg_reg_save_area_bytes != 0) {
       
   552     __ addptr(rsp, frame::arg_reg_save_area_bytes);
       
   553   }
       
   554 
       
   555   __ vzeroupper();
       
   556   __ pop_CPU_state();
       
   557   // restore sp
       
   558   __ mov(rsp, r13);
       
   559   __ bind(L);
       
   560 }
       
   561 
       
   562 
       
   563 static void gen_c2i_adapter(MacroAssembler *masm,
       
   564                             int total_args_passed,
       
   565                             int comp_args_on_stack,
       
   566                             const BasicType *sig_bt,
       
   567                             const VMRegPair *regs,
       
   568                             Label& skip_fixup) {
       
   569   // Before we get into the guts of the C2I adapter, see if we should be here
       
   570   // at all.  We've come from compiled code and are attempting to jump to the
       
   571   // interpreter, which means the caller made a static call to get here
       
   572   // (vcalls always get a compiled target if there is one).  Check for a
       
   573   // compiled target.  If there is one, we need to patch the caller's call.
       
   574   patch_callers_callsite(masm);
       
   575 
       
   576   __ bind(skip_fixup);
       
   577 
       
   578   // Since all args are passed on the stack, total_args_passed *
       
   579   // Interpreter::stackElementSize is the space we need. Plus 1 because
       
   580   // we also account for the return address location since
       
   581   // we store it first rather than hold it in rax across all the shuffling
       
   582 
       
   583   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
       
   584 
       
   585   // stack is aligned, keep it that way
       
   586   extraspace = align_up(extraspace, 2*wordSize);
       
   587 
       
   588   // Get return address
       
   589   __ pop(rax);
       
   590 
       
   591   // set senderSP value
       
   592   __ mov(r13, rsp);
       
   593 
       
   594   __ subptr(rsp, extraspace);
       
   595 
       
   596   // Store the return address in the expected location
       
   597   __ movptr(Address(rsp, 0), rax);
       
   598 
       
   599   // Now write the args into the outgoing interpreter space
       
   600   for (int i = 0; i < total_args_passed; i++) {
       
   601     if (sig_bt[i] == T_VOID) {
       
   602       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
       
   603       continue;
       
   604     }
       
   605 
       
   606     // offset to start parameters
       
   607     int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
       
   608     int next_off = st_off - Interpreter::stackElementSize;
       
   609 
       
   610     // Say 4 args:
       
   611     // i   st_off
       
   612     // 0   32 T_LONG
       
   613     // 1   24 T_VOID
       
   614     // 2   16 T_OBJECT
       
   615     // 3    8 T_BOOL
       
   616     // -    0 return address
       
   617     //
       
   618     // However to make thing extra confusing. Because we can fit a long/double in
       
   619     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
       
   620     // leaves one slot empty and only stores to a single slot. In this case the
       
   621     // slot that is occupied is the T_VOID slot. See I said it was confusing.
       
   622 
       
   623     VMReg r_1 = regs[i].first();
       
   624     VMReg r_2 = regs[i].second();
       
   625     if (!r_1->is_valid()) {
       
   626       assert(!r_2->is_valid(), "");
       
   627       continue;
       
   628     }
       
   629     if (r_1->is_stack()) {
       
   630       // memory to memory use rax
       
   631       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
       
   632       if (!r_2->is_valid()) {
       
   633         // sign extend??
       
   634         __ movl(rax, Address(rsp, ld_off));
       
   635         __ movptr(Address(rsp, st_off), rax);
       
   636 
       
   637       } else {
       
   638 
       
   639         __ movq(rax, Address(rsp, ld_off));
       
   640 
       
   641         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
       
   642         // T_DOUBLE and T_LONG use two slots in the interpreter
       
   643         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
       
   644           // ld_off == LSW, ld_off+wordSize == MSW
       
   645           // st_off == MSW, next_off == LSW
       
   646           __ movq(Address(rsp, next_off), rax);
       
   647 #ifdef ASSERT
       
   648           // Overwrite the unused slot with known junk
       
   649           __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
       
   650           __ movptr(Address(rsp, st_off), rax);
       
   651 #endif /* ASSERT */
       
   652         } else {
       
   653           __ movq(Address(rsp, st_off), rax);
       
   654         }
       
   655       }
       
   656     } else if (r_1->is_Register()) {
       
   657       Register r = r_1->as_Register();
       
   658       if (!r_2->is_valid()) {
       
   659         // must be only an int (or less ) so move only 32bits to slot
       
   660         // why not sign extend??
       
   661         __ movl(Address(rsp, st_off), r);
       
   662       } else {
       
   663         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
       
   664         // T_DOUBLE and T_LONG use two slots in the interpreter
       
   665         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
       
   666           // long/double in gpr
       
   667 #ifdef ASSERT
       
   668           // Overwrite the unused slot with known junk
       
   669           __ mov64(rax, CONST64(0xdeadffffdeadaaab));
       
   670           __ movptr(Address(rsp, st_off), rax);
       
   671 #endif /* ASSERT */
       
   672           __ movq(Address(rsp, next_off), r);
       
   673         } else {
       
   674           __ movptr(Address(rsp, st_off), r);
       
   675         }
       
   676       }
       
   677     } else {
       
   678       assert(r_1->is_XMMRegister(), "");
       
   679       if (!r_2->is_valid()) {
       
   680         // only a float use just part of the slot
       
   681         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
       
   682       } else {
       
   683 #ifdef ASSERT
       
   684         // Overwrite the unused slot with known junk
       
   685         __ mov64(rax, CONST64(0xdeadffffdeadaaac));
       
   686         __ movptr(Address(rsp, st_off), rax);
       
   687 #endif /* ASSERT */
       
   688         __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
       
   689       }
       
   690     }
       
   691   }
       
   692 
       
   693   // Schedule the branch target address early.
       
   694   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
       
   695   __ jmp(rcx);
       
   696 }
       
   697 
       
   698 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
       
   699                         address code_start, address code_end,
       
   700                         Label& L_ok) {
       
   701   Label L_fail;
       
   702   __ lea(temp_reg, ExternalAddress(code_start));
       
   703   __ cmpptr(pc_reg, temp_reg);
       
   704   __ jcc(Assembler::belowEqual, L_fail);
       
   705   __ lea(temp_reg, ExternalAddress(code_end));
       
   706   __ cmpptr(pc_reg, temp_reg);
       
   707   __ jcc(Assembler::below, L_ok);
       
   708   __ bind(L_fail);
       
   709 }
       
   710 
       
   711 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
       
   712                                     int total_args_passed,
       
   713                                     int comp_args_on_stack,
       
   714                                     const BasicType *sig_bt,
       
   715                                     const VMRegPair *regs) {
       
   716 
       
   717   // Note: r13 contains the senderSP on entry. We must preserve it since
       
   718   // we may do a i2c -> c2i transition if we lose a race where compiled
       
   719   // code goes non-entrant while we get args ready.
       
   720   // In addition we use r13 to locate all the interpreter args as
       
   721   // we must align the stack to 16 bytes on an i2c entry else we
       
   722   // lose alignment we expect in all compiled code and register
       
   723   // save code can segv when fxsave instructions find improperly
       
   724   // aligned stack pointer.
       
   725 
       
   726   // Adapters can be frameless because they do not require the caller
       
   727   // to perform additional cleanup work, such as correcting the stack pointer.
       
   728   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
       
   729   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
       
   730   // even if a callee has modified the stack pointer.
       
   731   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
       
   732   // routinely repairs its caller's stack pointer (from sender_sp, which is set
       
   733   // up via the senderSP register).
       
   734   // In other words, if *either* the caller or callee is interpreted, we can
       
   735   // get the stack pointer repaired after a call.
       
   736   // This is why c2i and i2c adapters cannot be indefinitely composed.
       
   737   // In particular, if a c2i adapter were to somehow call an i2c adapter,
       
   738   // both caller and callee would be compiled methods, and neither would
       
   739   // clean up the stack pointer changes performed by the two adapters.
       
   740   // If this happens, control eventually transfers back to the compiled
       
   741   // caller, but with an uncorrected stack, causing delayed havoc.
       
   742 
       
   743   // Pick up the return address
       
   744   __ movptr(rax, Address(rsp, 0));
       
   745 
       
   746   if (VerifyAdapterCalls &&
       
   747       (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
       
   748     // So, let's test for cascading c2i/i2c adapters right now.
       
   749     //  assert(Interpreter::contains($return_addr) ||
       
   750     //         StubRoutines::contains($return_addr),
       
   751     //         "i2c adapter must return to an interpreter frame");
       
   752     __ block_comment("verify_i2c { ");
       
   753     Label L_ok;
       
   754     if (Interpreter::code() != NULL)
       
   755       range_check(masm, rax, r11,
       
   756                   Interpreter::code()->code_start(), Interpreter::code()->code_end(),
       
   757                   L_ok);
       
   758     if (StubRoutines::code1() != NULL)
       
   759       range_check(masm, rax, r11,
       
   760                   StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
       
   761                   L_ok);
       
   762     if (StubRoutines::code2() != NULL)
       
   763       range_check(masm, rax, r11,
       
   764                   StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
       
   765                   L_ok);
       
   766     const char* msg = "i2c adapter must return to an interpreter frame";
       
   767     __ block_comment(msg);
       
   768     __ stop(msg);
       
   769     __ bind(L_ok);
       
   770     __ block_comment("} verify_i2ce ");
       
   771   }
       
   772 
       
   773   // Must preserve original SP for loading incoming arguments because
       
   774   // we need to align the outgoing SP for compiled code.
       
   775   __ movptr(r11, rsp);
       
   776 
       
   777   // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
       
   778   // in registers, we will occasionally have no stack args.
       
   779   int comp_words_on_stack = 0;
       
   780   if (comp_args_on_stack) {
       
   781     // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
       
   782     // registers are below.  By subtracting stack0, we either get a negative
       
   783     // number (all values in registers) or the maximum stack slot accessed.
       
   784 
       
   785     // Convert 4-byte c2 stack slots to words.
       
   786     comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
       
   787     // Round up to miminum stack alignment, in wordSize
       
   788     comp_words_on_stack = align_up(comp_words_on_stack, 2);
       
   789     __ subptr(rsp, comp_words_on_stack * wordSize);
       
   790   }
       
   791 
       
   792 
       
   793   // Ensure compiled code always sees stack at proper alignment
       
   794   __ andptr(rsp, -16);
       
   795 
       
   796   // push the return address and misalign the stack that youngest frame always sees
       
   797   // as far as the placement of the call instruction
       
   798   __ push(rax);
       
   799 
       
   800   // Put saved SP in another register
       
   801   const Register saved_sp = rax;
       
   802   __ movptr(saved_sp, r11);
       
   803 
       
   804   // Will jump to the compiled code just as if compiled code was doing it.
       
   805   // Pre-load the register-jump target early, to schedule it better.
       
   806   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
       
   807 
       
   808 #if INCLUDE_JVMCI
       
   809   if (EnableJVMCI || UseAOT) {
       
   810     // check if this call should be routed towards a specific entry point
       
   811     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
       
   812     Label no_alternative_target;
       
   813     __ jcc(Assembler::equal, no_alternative_target);
       
   814     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
       
   815     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
       
   816     __ bind(no_alternative_target);
       
   817   }
       
   818 #endif // INCLUDE_JVMCI
       
   819 
       
   820   // Now generate the shuffle code.  Pick up all register args and move the
       
   821   // rest through the floating point stack top.
       
   822   for (int i = 0; i < total_args_passed; i++) {
       
   823     if (sig_bt[i] == T_VOID) {
       
   824       // Longs and doubles are passed in native word order, but misaligned
       
   825       // in the 32-bit build.
       
   826       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
       
   827       continue;
       
   828     }
       
   829 
       
   830     // Pick up 0, 1 or 2 words from SP+offset.
       
   831 
       
   832     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
       
   833             "scrambled load targets?");
       
   834     // Load in argument order going down.
       
   835     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
       
   836     // Point to interpreter value (vs. tag)
       
   837     int next_off = ld_off - Interpreter::stackElementSize;
       
   838     //
       
   839     //
       
   840     //
       
   841     VMReg r_1 = regs[i].first();
       
   842     VMReg r_2 = regs[i].second();
       
   843     if (!r_1->is_valid()) {
       
   844       assert(!r_2->is_valid(), "");
       
   845       continue;
       
   846     }
       
   847     if (r_1->is_stack()) {
       
   848       // Convert stack slot to an SP offset (+ wordSize to account for return address )
       
   849       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
       
   850 
       
   851       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
       
   852       // and if we end up going thru a c2i because of a miss a reasonable value of r13
       
   853       // will be generated.
       
   854       if (!r_2->is_valid()) {
       
   855         // sign extend???
       
   856         __ movl(r13, Address(saved_sp, ld_off));
       
   857         __ movptr(Address(rsp, st_off), r13);
       
   858       } else {
       
   859         //
       
   860         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
       
   861         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
       
   862         // So we must adjust where to pick up the data to match the interpreter.
       
   863         //
       
   864         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
       
   865         // are accessed as negative so LSW is at LOW address
       
   866 
       
   867         // ld_off is MSW so get LSW
       
   868         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
       
   869                            next_off : ld_off;
       
   870         __ movq(r13, Address(saved_sp, offset));
       
   871         // st_off is LSW (i.e. reg.first())
       
   872         __ movq(Address(rsp, st_off), r13);
       
   873       }
       
   874     } else if (r_1->is_Register()) {  // Register argument
       
   875       Register r = r_1->as_Register();
       
   876       assert(r != rax, "must be different");
       
   877       if (r_2->is_valid()) {
       
   878         //
       
   879         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
       
   880         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
       
   881         // So we must adjust where to pick up the data to match the interpreter.
       
   882 
       
   883         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
       
   884                            next_off : ld_off;
       
   885 
       
   886         // this can be a misaligned move
       
   887         __ movq(r, Address(saved_sp, offset));
       
   888       } else {
       
   889         // sign extend and use a full word?
       
   890         __ movl(r, Address(saved_sp, ld_off));
       
   891       }
       
   892     } else {
       
   893       if (!r_2->is_valid()) {
       
   894         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
       
   895       } else {
       
   896         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
       
   897       }
       
   898     }
       
   899   }
       
   900 
       
   901   // 6243940 We might end up in handle_wrong_method if
       
   902   // the callee is deoptimized as we race thru here. If that
       
   903   // happens we don't want to take a safepoint because the
       
   904   // caller frame will look interpreted and arguments are now
       
   905   // "compiled" so it is much better to make this transition
       
   906   // invisible to the stack walking code. Unfortunately if
       
   907   // we try and find the callee by normal means a safepoint
       
   908   // is possible. So we stash the desired callee in the thread
       
   909   // and the vm will find there should this case occur.
       
   910 
       
   911   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
       
   912 
       
   913   // put Method* where a c2i would expect should we end up there
       
   914   // only needed becaus eof c2 resolve stubs return Method* as a result in
       
   915   // rax
       
   916   __ mov(rax, rbx);
       
   917   __ jmp(r11);
       
   918 }
       
   919 
       
   920 // ---------------------------------------------------------------
       
   921 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
       
   922                                                             int total_args_passed,
       
   923                                                             int comp_args_on_stack,
       
   924                                                             const BasicType *sig_bt,
       
   925                                                             const VMRegPair *regs,
       
   926                                                             AdapterFingerPrint* fingerprint) {
       
   927   address i2c_entry = __ pc();
       
   928 
       
   929   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
       
   930 
       
   931   // -------------------------------------------------------------------------
       
   932   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
       
   933   // to the interpreter.  The args start out packed in the compiled layout.  They
       
   934   // need to be unpacked into the interpreter layout.  This will almost always
       
   935   // require some stack space.  We grow the current (compiled) stack, then repack
       
   936   // the args.  We  finally end in a jump to the generic interpreter entry point.
       
   937   // On exit from the interpreter, the interpreter will restore our SP (lest the
       
   938   // compiled code, which relys solely on SP and not RBP, get sick).
       
   939 
       
   940   address c2i_unverified_entry = __ pc();
       
   941   Label skip_fixup;
       
   942   Label ok;
       
   943 
       
   944   Register holder = rax;
       
   945   Register receiver = j_rarg0;
       
   946   Register temp = rbx;
       
   947 
       
   948   {
       
   949     __ load_klass(temp, receiver);
       
   950     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
       
   951     __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
       
   952     __ jcc(Assembler::equal, ok);
       
   953     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
       
   954 
       
   955     __ bind(ok);
       
   956     // Method might have been compiled since the call site was patched to
       
   957     // interpreted if that is the case treat it as a miss so we can get
       
   958     // the call site corrected.
       
   959     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
       
   960     __ jcc(Assembler::equal, skip_fixup);
       
   961     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
       
   962   }
       
   963 
       
   964   address c2i_entry = __ pc();
       
   965 
       
   966   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
       
   967 
       
   968   __ flush();
       
   969   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
       
   970 }
       
   971 
       
   972 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
       
   973                                          VMRegPair *regs,
       
   974                                          VMRegPair *regs2,
       
   975                                          int total_args_passed) {
       
   976   assert(regs2 == NULL, "not needed on x86");
       
   977 // We return the amount of VMRegImpl stack slots we need to reserve for all
       
   978 // the arguments NOT counting out_preserve_stack_slots.
       
   979 
       
   980 // NOTE: These arrays will have to change when c1 is ported
       
   981 #ifdef _WIN64
       
   982     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
       
   983       c_rarg0, c_rarg1, c_rarg2, c_rarg3
       
   984     };
       
   985     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
       
   986       c_farg0, c_farg1, c_farg2, c_farg3
       
   987     };
       
   988 #else
       
   989     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
       
   990       c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
       
   991     };
       
   992     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
       
   993       c_farg0, c_farg1, c_farg2, c_farg3,
       
   994       c_farg4, c_farg5, c_farg6, c_farg7
       
   995     };
       
   996 #endif // _WIN64
       
   997 
       
   998 
       
   999     uint int_args = 0;
       
  1000     uint fp_args = 0;
       
  1001     uint stk_args = 0; // inc by 2 each time
       
  1002 
       
  1003     for (int i = 0; i < total_args_passed; i++) {
       
  1004       switch (sig_bt[i]) {
       
  1005       case T_BOOLEAN:
       
  1006       case T_CHAR:
       
  1007       case T_BYTE:
       
  1008       case T_SHORT:
       
  1009       case T_INT:
       
  1010         if (int_args < Argument::n_int_register_parameters_c) {
       
  1011           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
       
  1012 #ifdef _WIN64
       
  1013           fp_args++;
       
  1014           // Allocate slots for callee to stuff register args the stack.
       
  1015           stk_args += 2;
       
  1016 #endif
       
  1017         } else {
       
  1018           regs[i].set1(VMRegImpl::stack2reg(stk_args));
       
  1019           stk_args += 2;
       
  1020         }
       
  1021         break;
       
  1022       case T_LONG:
       
  1023         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
       
  1024         // fall through
       
  1025       case T_OBJECT:
       
  1026       case T_ARRAY:
       
  1027       case T_ADDRESS:
       
  1028       case T_METADATA:
       
  1029         if (int_args < Argument::n_int_register_parameters_c) {
       
  1030           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
       
  1031 #ifdef _WIN64
       
  1032           fp_args++;
       
  1033           stk_args += 2;
       
  1034 #endif
       
  1035         } else {
       
  1036           regs[i].set2(VMRegImpl::stack2reg(stk_args));
       
  1037           stk_args += 2;
       
  1038         }
       
  1039         break;
       
  1040       case T_FLOAT:
       
  1041         if (fp_args < Argument::n_float_register_parameters_c) {
       
  1042           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
       
  1043 #ifdef _WIN64
       
  1044           int_args++;
       
  1045           // Allocate slots for callee to stuff register args the stack.
       
  1046           stk_args += 2;
       
  1047 #endif
       
  1048         } else {
       
  1049           regs[i].set1(VMRegImpl::stack2reg(stk_args));
       
  1050           stk_args += 2;
       
  1051         }
       
  1052         break;
       
  1053       case T_DOUBLE:
       
  1054         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
       
  1055         if (fp_args < Argument::n_float_register_parameters_c) {
       
  1056           regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
       
  1057 #ifdef _WIN64
       
  1058           int_args++;
       
  1059           // Allocate slots for callee to stuff register args the stack.
       
  1060           stk_args += 2;
       
  1061 #endif
       
  1062         } else {
       
  1063           regs[i].set2(VMRegImpl::stack2reg(stk_args));
       
  1064           stk_args += 2;
       
  1065         }
       
  1066         break;
       
  1067       case T_VOID: // Halves of longs and doubles
       
  1068         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
       
  1069         regs[i].set_bad();
       
  1070         break;
       
  1071       default:
       
  1072         ShouldNotReachHere();
       
  1073         break;
       
  1074       }
       
  1075     }
       
  1076 #ifdef _WIN64
       
  1077   // windows abi requires that we always allocate enough stack space
       
  1078   // for 4 64bit registers to be stored down.
       
  1079   if (stk_args < 8) {
       
  1080     stk_args = 8;
       
  1081   }
       
  1082 #endif // _WIN64
       
  1083 
       
  1084   return stk_args;
       
  1085 }
       
  1086 
       
  1087 // On 64 bit we will store integer like items to the stack as
       
  1088 // 64 bits items (sparc abi) even though java would only store
       
  1089 // 32bits for a parameter. On 32bit it will simply be 32 bits
       
  1090 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
       
  1091 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
       
  1092   if (src.first()->is_stack()) {
       
  1093     if (dst.first()->is_stack()) {
       
  1094       // stack to stack
       
  1095       __ movslq(rax, Address(rbp, reg2offset_in(src.first())));
       
  1096       __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
       
  1097     } else {
       
  1098       // stack to reg
       
  1099       __ movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
       
  1100     }
       
  1101   } else if (dst.first()->is_stack()) {
       
  1102     // reg to stack
       
  1103     // Do we really have to sign extend???
       
  1104     // __ movslq(src.first()->as_Register(), src.first()->as_Register());
       
  1105     __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
       
  1106   } else {
       
  1107     // Do we really have to sign extend???
       
  1108     // __ movslq(dst.first()->as_Register(), src.first()->as_Register());
       
  1109     if (dst.first() != src.first()) {
       
  1110       __ movq(dst.first()->as_Register(), src.first()->as_Register());
       
  1111     }
       
  1112   }
       
  1113 }
       
  1114 
       
  1115 static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
       
  1116   if (src.first()->is_stack()) {
       
  1117     if (dst.first()->is_stack()) {
       
  1118       // stack to stack
       
  1119       __ movq(rax, Address(rbp, reg2offset_in(src.first())));
       
  1120       __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
       
  1121     } else {
       
  1122       // stack to reg
       
  1123       __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
       
  1124     }
       
  1125   } else if (dst.first()->is_stack()) {
       
  1126     // reg to stack
       
  1127     __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
       
  1128   } else {
       
  1129     if (dst.first() != src.first()) {
       
  1130       __ movq(dst.first()->as_Register(), src.first()->as_Register());
       
  1131     }
       
  1132   }
       
  1133 }
       
  1134 
       
  1135 // An oop arg. Must pass a handle not the oop itself
       
  1136 static void object_move(MacroAssembler* masm,
       
  1137                         OopMap* map,
       
  1138                         int oop_handle_offset,
       
  1139                         int framesize_in_slots,
       
  1140                         VMRegPair src,
       
  1141                         VMRegPair dst,
       
  1142                         bool is_receiver,
       
  1143                         int* receiver_offset) {
       
  1144 
       
  1145   // must pass a handle. First figure out the location we use as a handle
       
  1146 
       
  1147   Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register();
       
  1148 
       
  1149   // See if oop is NULL if it is we need no handle
       
  1150 
       
  1151   if (src.first()->is_stack()) {
       
  1152 
       
  1153     // Oop is already on the stack as an argument
       
  1154     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
       
  1155     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
       
  1156     if (is_receiver) {
       
  1157       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
       
  1158     }
       
  1159 
       
  1160     __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD);
       
  1161     __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
       
  1162     // conditionally move a NULL
       
  1163     __ cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first())));
       
  1164   } else {
       
  1165 
       
  1166     // Oop is in an a register we must store it to the space we reserve
       
  1167     // on the stack for oop_handles and pass a handle if oop is non-NULL
       
  1168 
       
  1169     const Register rOop = src.first()->as_Register();
       
  1170     int oop_slot;
       
  1171     if (rOop == j_rarg0)
       
  1172       oop_slot = 0;
       
  1173     else if (rOop == j_rarg1)
       
  1174       oop_slot = 1;
       
  1175     else if (rOop == j_rarg2)
       
  1176       oop_slot = 2;
       
  1177     else if (rOop == j_rarg3)
       
  1178       oop_slot = 3;
       
  1179     else if (rOop == j_rarg4)
       
  1180       oop_slot = 4;
       
  1181     else {
       
  1182       assert(rOop == j_rarg5, "wrong register");
       
  1183       oop_slot = 5;
       
  1184     }
       
  1185 
       
  1186     oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
       
  1187     int offset = oop_slot*VMRegImpl::stack_slot_size;
       
  1188 
       
  1189     map->set_oop(VMRegImpl::stack2reg(oop_slot));
       
  1190     // Store oop in handle area, may be NULL
       
  1191     __ movptr(Address(rsp, offset), rOop);
       
  1192     if (is_receiver) {
       
  1193       *receiver_offset = offset;
       
  1194     }
       
  1195 
       
  1196     __ cmpptr(rOop, (int32_t)NULL_WORD);
       
  1197     __ lea(rHandle, Address(rsp, offset));
       
  1198     // conditionally move a NULL from the handle area where it was just stored
       
  1199     __ cmovptr(Assembler::equal, rHandle, Address(rsp, offset));
       
  1200   }
       
  1201 
       
  1202   // If arg is on the stack then place it otherwise it is already in correct reg.
       
  1203   if (dst.first()->is_stack()) {
       
  1204     __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
       
  1205   }
       
  1206 }
       
  1207 
       
  1208 // A float arg may have to do float reg int reg conversion
       
  1209 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
       
  1210   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
       
  1211 
       
  1212   // The calling conventions assures us that each VMregpair is either
       
  1213   // all really one physical register or adjacent stack slots.
       
  1214   // This greatly simplifies the cases here compared to sparc.
       
  1215 
       
  1216   if (src.first()->is_stack()) {
       
  1217     if (dst.first()->is_stack()) {
       
  1218       __ movl(rax, Address(rbp, reg2offset_in(src.first())));
       
  1219       __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
       
  1220     } else {
       
  1221       // stack to reg
       
  1222       assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters");
       
  1223       __ movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first())));
       
  1224     }
       
  1225   } else if (dst.first()->is_stack()) {
       
  1226     // reg to stack
       
  1227     assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters");
       
  1228     __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
       
  1229   } else {
       
  1230     // reg to reg
       
  1231     // In theory these overlap but the ordering is such that this is likely a nop
       
  1232     if ( src.first() != dst.first()) {
       
  1233       __ movdbl(dst.first()->as_XMMRegister(),  src.first()->as_XMMRegister());
       
  1234     }
       
  1235   }
       
  1236 }
       
  1237 
       
  1238 // A long move
       
  1239 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
       
  1240 
       
  1241   // The calling conventions assures us that each VMregpair is either
       
  1242   // all really one physical register or adjacent stack slots.
       
  1243   // This greatly simplifies the cases here compared to sparc.
       
  1244 
       
  1245   if (src.is_single_phys_reg() ) {
       
  1246     if (dst.is_single_phys_reg()) {
       
  1247       if (dst.first() != src.first()) {
       
  1248         __ mov(dst.first()->as_Register(), src.first()->as_Register());
       
  1249       }
       
  1250     } else {
       
  1251       assert(dst.is_single_reg(), "not a stack pair");
       
  1252       __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
       
  1253     }
       
  1254   } else if (dst.is_single_phys_reg()) {
       
  1255     assert(src.is_single_reg(),  "not a stack pair");
       
  1256     __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first())));
       
  1257   } else {
       
  1258     assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
       
  1259     __ movq(rax, Address(rbp, reg2offset_in(src.first())));
       
  1260     __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
       
  1261   }
       
  1262 }
       
  1263 
       
  1264 // A double move
       
  1265 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
       
  1266 
       
  1267   // The calling conventions assures us that each VMregpair is either
       
  1268   // all really one physical register or adjacent stack slots.
       
  1269   // This greatly simplifies the cases here compared to sparc.
       
  1270 
       
  1271   if (src.is_single_phys_reg() ) {
       
  1272     if (dst.is_single_phys_reg()) {
       
  1273       // In theory these overlap but the ordering is such that this is likely a nop
       
  1274       if ( src.first() != dst.first()) {
       
  1275         __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
       
  1276       }
       
  1277     } else {
       
  1278       assert(dst.is_single_reg(), "not a stack pair");
       
  1279       __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
       
  1280     }
       
  1281   } else if (dst.is_single_phys_reg()) {
       
  1282     assert(src.is_single_reg(),  "not a stack pair");
       
  1283     __ movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first())));
       
  1284   } else {
       
  1285     assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
       
  1286     __ movq(rax, Address(rbp, reg2offset_in(src.first())));
       
  1287     __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
       
  1288   }
       
  1289 }
       
  1290 
       
  1291 
       
  1292 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
       
  1293   // We always ignore the frame_slots arg and just use the space just below frame pointer
       
  1294   // which by this time is free to use
       
  1295   switch (ret_type) {
       
  1296   case T_FLOAT:
       
  1297     __ movflt(Address(rbp, -wordSize), xmm0);
       
  1298     break;
       
  1299   case T_DOUBLE:
       
  1300     __ movdbl(Address(rbp, -wordSize), xmm0);
       
  1301     break;
       
  1302   case T_VOID:  break;
       
  1303   default: {
       
  1304     __ movptr(Address(rbp, -wordSize), rax);
       
  1305     }
       
  1306   }
       
  1307 }
       
  1308 
       
  1309 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
       
  1310   // We always ignore the frame_slots arg and just use the space just below frame pointer
       
  1311   // which by this time is free to use
       
  1312   switch (ret_type) {
       
  1313   case T_FLOAT:
       
  1314     __ movflt(xmm0, Address(rbp, -wordSize));
       
  1315     break;
       
  1316   case T_DOUBLE:
       
  1317     __ movdbl(xmm0, Address(rbp, -wordSize));
       
  1318     break;
       
  1319   case T_VOID:  break;
       
  1320   default: {
       
  1321     __ movptr(rax, Address(rbp, -wordSize));
       
  1322     }
       
  1323   }
       
  1324 }
       
  1325 
       
  1326 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
       
  1327     for ( int i = first_arg ; i < arg_count ; i++ ) {
       
  1328       if (args[i].first()->is_Register()) {
       
  1329         __ push(args[i].first()->as_Register());
       
  1330       } else if (args[i].first()->is_XMMRegister()) {
       
  1331         __ subptr(rsp, 2*wordSize);
       
  1332         __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister());
       
  1333       }
       
  1334     }
       
  1335 }
       
  1336 
       
  1337 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
       
  1338     for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
       
  1339       if (args[i].first()->is_Register()) {
       
  1340         __ pop(args[i].first()->as_Register());
       
  1341       } else if (args[i].first()->is_XMMRegister()) {
       
  1342         __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0));
       
  1343         __ addptr(rsp, 2*wordSize);
       
  1344       }
       
  1345     }
       
  1346 }
       
  1347 
       
  1348 
       
  1349 static void save_or_restore_arguments(MacroAssembler* masm,
       
  1350                                       const int stack_slots,
       
  1351                                       const int total_in_args,
       
  1352                                       const int arg_save_area,
       
  1353                                       OopMap* map,
       
  1354                                       VMRegPair* in_regs,
       
  1355                                       BasicType* in_sig_bt) {
       
  1356   // if map is non-NULL then the code should store the values,
       
  1357   // otherwise it should load them.
       
  1358   int slot = arg_save_area;
       
  1359   // Save down double word first
       
  1360   for ( int i = 0; i < total_in_args; i++) {
       
  1361     if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
       
  1362       int offset = slot * VMRegImpl::stack_slot_size;
       
  1363       slot += VMRegImpl::slots_per_word;
       
  1364       assert(slot <= stack_slots, "overflow");
       
  1365       if (map != NULL) {
       
  1366         __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
       
  1367       } else {
       
  1368         __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
       
  1369       }
       
  1370     }
       
  1371     if (in_regs[i].first()->is_Register() &&
       
  1372         (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
       
  1373       int offset = slot * VMRegImpl::stack_slot_size;
       
  1374       if (map != NULL) {
       
  1375         __ movq(Address(rsp, offset), in_regs[i].first()->as_Register());
       
  1376         if (in_sig_bt[i] == T_ARRAY) {
       
  1377           map->set_oop(VMRegImpl::stack2reg(slot));;
       
  1378         }
       
  1379       } else {
       
  1380         __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
       
  1381       }
       
  1382       slot += VMRegImpl::slots_per_word;
       
  1383     }
       
  1384   }
       
  1385   // Save or restore single word registers
       
  1386   for ( int i = 0; i < total_in_args; i++) {
       
  1387     if (in_regs[i].first()->is_Register()) {
       
  1388       int offset = slot * VMRegImpl::stack_slot_size;
       
  1389       slot++;
       
  1390       assert(slot <= stack_slots, "overflow");
       
  1391 
       
  1392       // Value is in an input register pass we must flush it to the stack
       
  1393       const Register reg = in_regs[i].first()->as_Register();
       
  1394       switch (in_sig_bt[i]) {
       
  1395         case T_BOOLEAN:
       
  1396         case T_CHAR:
       
  1397         case T_BYTE:
       
  1398         case T_SHORT:
       
  1399         case T_INT:
       
  1400           if (map != NULL) {
       
  1401             __ movl(Address(rsp, offset), reg);
       
  1402           } else {
       
  1403             __ movl(reg, Address(rsp, offset));
       
  1404           }
       
  1405           break;
       
  1406         case T_ARRAY:
       
  1407         case T_LONG:
       
  1408           // handled above
       
  1409           break;
       
  1410         case T_OBJECT:
       
  1411         default: ShouldNotReachHere();
       
  1412       }
       
  1413     } else if (in_regs[i].first()->is_XMMRegister()) {
       
  1414       if (in_sig_bt[i] == T_FLOAT) {
       
  1415         int offset = slot * VMRegImpl::stack_slot_size;
       
  1416         slot++;
       
  1417         assert(slot <= stack_slots, "overflow");
       
  1418         if (map != NULL) {
       
  1419           __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
       
  1420         } else {
       
  1421           __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
       
  1422         }
       
  1423       }
       
  1424     } else if (in_regs[i].first()->is_stack()) {
       
  1425       if (in_sig_bt[i] == T_ARRAY && map != NULL) {
       
  1426         int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
       
  1427         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
       
  1428       }
       
  1429     }
       
  1430   }
       
  1431 }
       
  1432 
       
  1433 
       
  1434 // Check GCLocker::needs_gc and enter the runtime if it's true.  This
       
  1435 // keeps a new JNI critical region from starting until a GC has been
       
  1436 // forced.  Save down any oops in registers and describe them in an
       
  1437 // OopMap.
       
  1438 static void check_needs_gc_for_critical_native(MacroAssembler* masm,
       
  1439                                                int stack_slots,
       
  1440                                                int total_c_args,
       
  1441                                                int total_in_args,
       
  1442                                                int arg_save_area,
       
  1443                                                OopMapSet* oop_maps,
       
  1444                                                VMRegPair* in_regs,
       
  1445                                                BasicType* in_sig_bt) {
       
  1446   __ block_comment("check GCLocker::needs_gc");
       
  1447   Label cont;
       
  1448   __ cmp8(ExternalAddress((address)GCLocker::needs_gc_address()), false);
       
  1449   __ jcc(Assembler::equal, cont);
       
  1450 
       
  1451   // Save down any incoming oops and call into the runtime to halt for a GC
       
  1452 
       
  1453   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
       
  1454   save_or_restore_arguments(masm, stack_slots, total_in_args,
       
  1455                             arg_save_area, map, in_regs, in_sig_bt);
       
  1456 
       
  1457   address the_pc = __ pc();
       
  1458   oop_maps->add_gc_map( __ offset(), map);
       
  1459   __ set_last_Java_frame(rsp, noreg, the_pc);
       
  1460 
       
  1461   __ block_comment("block_for_jni_critical");
       
  1462   __ movptr(c_rarg0, r15_thread);
       
  1463   __ mov(r12, rsp); // remember sp
       
  1464   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
       
  1465   __ andptr(rsp, -16); // align stack as required by ABI
       
  1466   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
       
  1467   __ mov(rsp, r12); // restore sp
       
  1468   __ reinit_heapbase();
       
  1469 
       
  1470   __ reset_last_Java_frame(false);
       
  1471 
       
  1472   save_or_restore_arguments(masm, stack_slots, total_in_args,
       
  1473                             arg_save_area, NULL, in_regs, in_sig_bt);
       
  1474   __ bind(cont);
       
  1475 #ifdef ASSERT
       
  1476   if (StressCriticalJNINatives) {
       
  1477     // Stress register saving
       
  1478     OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
       
  1479     save_or_restore_arguments(masm, stack_slots, total_in_args,
       
  1480                               arg_save_area, map, in_regs, in_sig_bt);
       
  1481     // Destroy argument registers
       
  1482     for (int i = 0; i < total_in_args - 1; i++) {
       
  1483       if (in_regs[i].first()->is_Register()) {
       
  1484         const Register reg = in_regs[i].first()->as_Register();
       
  1485         __ xorptr(reg, reg);
       
  1486       } else if (in_regs[i].first()->is_XMMRegister()) {
       
  1487         __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
       
  1488       } else if (in_regs[i].first()->is_FloatRegister()) {
       
  1489         ShouldNotReachHere();
       
  1490       } else if (in_regs[i].first()->is_stack()) {
       
  1491         // Nothing to do
       
  1492       } else {
       
  1493         ShouldNotReachHere();
       
  1494       }
       
  1495       if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
       
  1496         i++;
       
  1497       }
       
  1498     }
       
  1499 
       
  1500     save_or_restore_arguments(masm, stack_slots, total_in_args,
       
  1501                               arg_save_area, NULL, in_regs, in_sig_bt);
       
  1502   }
       
  1503 #endif
       
  1504 }
       
  1505 
       
  1506 // Unpack an array argument into a pointer to the body and the length
       
  1507 // if the array is non-null, otherwise pass 0 for both.
       
  1508 static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
       
  1509   Register tmp_reg = rax;
       
  1510   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
       
  1511          "possible collision");
       
  1512   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
       
  1513          "possible collision");
       
  1514 
       
  1515   __ block_comment("unpack_array_argument {");
       
  1516 
       
  1517   // Pass the length, ptr pair
       
  1518   Label is_null, done;
       
  1519   VMRegPair tmp;
       
  1520   tmp.set_ptr(tmp_reg->as_VMReg());
       
  1521   if (reg.first()->is_stack()) {
       
  1522     // Load the arg up from the stack
       
  1523     move_ptr(masm, reg, tmp);
       
  1524     reg = tmp;
       
  1525   }
       
  1526   __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
       
  1527   __ jccb(Assembler::equal, is_null);
       
  1528   __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
       
  1529   move_ptr(masm, tmp, body_arg);
       
  1530   // load the length relative to the body.
       
  1531   __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
       
  1532                            arrayOopDesc::base_offset_in_bytes(in_elem_type)));
       
  1533   move32_64(masm, tmp, length_arg);
       
  1534   __ jmpb(done);
       
  1535   __ bind(is_null);
       
  1536   // Pass zeros
       
  1537   __ xorptr(tmp_reg, tmp_reg);
       
  1538   move_ptr(masm, tmp, body_arg);
       
  1539   move32_64(masm, tmp, length_arg);
       
  1540   __ bind(done);
       
  1541 
       
  1542   __ block_comment("} unpack_array_argument");
       
  1543 }
       
  1544 
       
  1545 
       
  1546 // Different signatures may require very different orders for the move
       
  1547 // to avoid clobbering other arguments.  There's no simple way to
       
  1548 // order them safely.  Compute a safe order for issuing stores and
       
  1549 // break any cycles in those stores.  This code is fairly general but
       
  1550 // it's not necessary on the other platforms so we keep it in the
       
  1551 // platform dependent code instead of moving it into a shared file.
       
  1552 // (See bugs 7013347 & 7145024.)
       
  1553 // Note that this code is specific to LP64.
       
  1554 class ComputeMoveOrder: public StackObj {
       
  1555   class MoveOperation: public ResourceObj {
       
  1556     friend class ComputeMoveOrder;
       
  1557    private:
       
  1558     VMRegPair        _src;
       
  1559     VMRegPair        _dst;
       
  1560     int              _src_index;
       
  1561     int              _dst_index;
       
  1562     bool             _processed;
       
  1563     MoveOperation*  _next;
       
  1564     MoveOperation*  _prev;
       
  1565 
       
  1566     static int get_id(VMRegPair r) {
       
  1567       return r.first()->value();
       
  1568     }
       
  1569 
       
  1570    public:
       
  1571     MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
       
  1572       _src(src)
       
  1573     , _src_index(src_index)
       
  1574     , _dst(dst)
       
  1575     , _dst_index(dst_index)
       
  1576     , _next(NULL)
       
  1577     , _prev(NULL)
       
  1578     , _processed(false) {
       
  1579     }
       
  1580 
       
  1581     VMRegPair src() const              { return _src; }
       
  1582     int src_id() const                 { return get_id(src()); }
       
  1583     int src_index() const              { return _src_index; }
       
  1584     VMRegPair dst() const              { return _dst; }
       
  1585     void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; }
       
  1586     int dst_index() const              { return _dst_index; }
       
  1587     int dst_id() const                 { return get_id(dst()); }
       
  1588     MoveOperation* next() const       { return _next; }
       
  1589     MoveOperation* prev() const       { return _prev; }
       
  1590     void set_processed()               { _processed = true; }
       
  1591     bool is_processed() const          { return _processed; }
       
  1592 
       
  1593     // insert
       
  1594     void break_cycle(VMRegPair temp_register) {
       
  1595       // create a new store following the last store
       
  1596       // to move from the temp_register to the original
       
  1597       MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst());
       
  1598 
       
  1599       // break the cycle of links and insert new_store at the end
       
  1600       // break the reverse link.
       
  1601       MoveOperation* p = prev();
       
  1602       assert(p->next() == this, "must be");
       
  1603       _prev = NULL;
       
  1604       p->_next = new_store;
       
  1605       new_store->_prev = p;
       
  1606 
       
  1607       // change the original store to save it's value in the temp.
       
  1608       set_dst(-1, temp_register);
       
  1609     }
       
  1610 
       
  1611     void link(GrowableArray<MoveOperation*>& killer) {
       
  1612       // link this store in front the store that it depends on
       
  1613       MoveOperation* n = killer.at_grow(src_id(), NULL);
       
  1614       if (n != NULL) {
       
  1615         assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet");
       
  1616         _next = n;
       
  1617         n->_prev = this;
       
  1618       }
       
  1619     }
       
  1620   };
       
  1621 
       
  1622  private:
       
  1623   GrowableArray<MoveOperation*> edges;
       
  1624 
       
  1625  public:
       
  1626   ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
       
  1627                     BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) {
       
  1628     // Move operations where the dest is the stack can all be
       
  1629     // scheduled first since they can't interfere with the other moves.
       
  1630     for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
       
  1631       if (in_sig_bt[i] == T_ARRAY) {
       
  1632         c_arg--;
       
  1633         if (out_regs[c_arg].first()->is_stack() &&
       
  1634             out_regs[c_arg + 1].first()->is_stack()) {
       
  1635           arg_order.push(i);
       
  1636           arg_order.push(c_arg);
       
  1637         } else {
       
  1638           if (out_regs[c_arg].first()->is_stack() ||
       
  1639               in_regs[i].first() == out_regs[c_arg].first()) {
       
  1640             add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]);
       
  1641           } else {
       
  1642             add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]);
       
  1643           }
       
  1644         }
       
  1645       } else if (in_sig_bt[i] == T_VOID) {
       
  1646         arg_order.push(i);
       
  1647         arg_order.push(c_arg);
       
  1648       } else {
       
  1649         if (out_regs[c_arg].first()->is_stack() ||
       
  1650             in_regs[i].first() == out_regs[c_arg].first()) {
       
  1651           arg_order.push(i);
       
  1652           arg_order.push(c_arg);
       
  1653         } else {
       
  1654           add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]);
       
  1655         }
       
  1656       }
       
  1657     }
       
  1658     // Break any cycles in the register moves and emit the in the
       
  1659     // proper order.
       
  1660     GrowableArray<MoveOperation*>* stores = get_store_order(tmp_vmreg);
       
  1661     for (int i = 0; i < stores->length(); i++) {
       
  1662       arg_order.push(stores->at(i)->src_index());
       
  1663       arg_order.push(stores->at(i)->dst_index());
       
  1664     }
       
  1665  }
       
  1666 
       
  1667   // Collected all the move operations
       
  1668   void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) {
       
  1669     if (src.first() == dst.first()) return;
       
  1670     edges.append(new MoveOperation(src_index, src, dst_index, dst));
       
  1671   }
       
  1672 
       
  1673   // Walk the edges breaking cycles between moves.  The result list
       
  1674   // can be walked in order to produce the proper set of loads
       
  1675   GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) {
       
  1676     // Record which moves kill which values
       
  1677     GrowableArray<MoveOperation*> killer;
       
  1678     for (int i = 0; i < edges.length(); i++) {
       
  1679       MoveOperation* s = edges.at(i);
       
  1680       assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer");
       
  1681       killer.at_put_grow(s->dst_id(), s, NULL);
       
  1682     }
       
  1683     assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL,
       
  1684            "make sure temp isn't in the registers that are killed");
       
  1685 
       
  1686     // create links between loads and stores
       
  1687     for (int i = 0; i < edges.length(); i++) {
       
  1688       edges.at(i)->link(killer);
       
  1689     }
       
  1690 
       
  1691     // at this point, all the move operations are chained together
       
  1692     // in a doubly linked list.  Processing it backwards finds
       
  1693     // the beginning of the chain, forwards finds the end.  If there's
       
  1694     // a cycle it can be broken at any point,  so pick an edge and walk
       
  1695     // backward until the list ends or we end where we started.
       
  1696     GrowableArray<MoveOperation*>* stores = new GrowableArray<MoveOperation*>();
       
  1697     for (int e = 0; e < edges.length(); e++) {
       
  1698       MoveOperation* s = edges.at(e);
       
  1699       if (!s->is_processed()) {
       
  1700         MoveOperation* start = s;
       
  1701         // search for the beginning of the chain or cycle
       
  1702         while (start->prev() != NULL && start->prev() != s) {
       
  1703           start = start->prev();
       
  1704         }
       
  1705         if (start->prev() == s) {
       
  1706           start->break_cycle(temp_register);
       
  1707         }
       
  1708         // walk the chain forward inserting to store list
       
  1709         while (start != NULL) {
       
  1710           stores->append(start);
       
  1711           start->set_processed();
       
  1712           start = start->next();
       
  1713         }
       
  1714       }
       
  1715     }
       
  1716     return stores;
       
  1717   }
       
  1718 };
       
  1719 
       
  1720 static void verify_oop_args(MacroAssembler* masm,
       
  1721                             const methodHandle& method,
       
  1722                             const BasicType* sig_bt,
       
  1723                             const VMRegPair* regs) {
       
  1724   Register temp_reg = rbx;  // not part of any compiled calling seq
       
  1725   if (VerifyOops) {
       
  1726     for (int i = 0; i < method->size_of_parameters(); i++) {
       
  1727       if (sig_bt[i] == T_OBJECT ||
       
  1728           sig_bt[i] == T_ARRAY) {
       
  1729         VMReg r = regs[i].first();
       
  1730         assert(r->is_valid(), "bad oop arg");
       
  1731         if (r->is_stack()) {
       
  1732           __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
       
  1733           __ verify_oop(temp_reg);
       
  1734         } else {
       
  1735           __ verify_oop(r->as_Register());
       
  1736         }
       
  1737       }
       
  1738     }
       
  1739   }
       
  1740 }
       
  1741 
       
  1742 static void gen_special_dispatch(MacroAssembler* masm,
       
  1743                                  const methodHandle& method,
       
  1744                                  const BasicType* sig_bt,
       
  1745                                  const VMRegPair* regs) {
       
  1746   verify_oop_args(masm, method, sig_bt, regs);
       
  1747   vmIntrinsics::ID iid = method->intrinsic_id();
       
  1748 
       
  1749   // Now write the args into the outgoing interpreter space
       
  1750   bool     has_receiver   = false;
       
  1751   Register receiver_reg   = noreg;
       
  1752   int      member_arg_pos = -1;
       
  1753   Register member_reg     = noreg;
       
  1754   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
       
  1755   if (ref_kind != 0) {
       
  1756     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
       
  1757     member_reg = rbx;  // known to be free at this point
       
  1758     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
       
  1759   } else if (iid == vmIntrinsics::_invokeBasic) {
       
  1760     has_receiver = true;
       
  1761   } else {
       
  1762     fatal("unexpected intrinsic id %d", iid);
       
  1763   }
       
  1764 
       
  1765   if (member_reg != noreg) {
       
  1766     // Load the member_arg into register, if necessary.
       
  1767     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
       
  1768     VMReg r = regs[member_arg_pos].first();
       
  1769     if (r->is_stack()) {
       
  1770       __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
       
  1771     } else {
       
  1772       // no data motion is needed
       
  1773       member_reg = r->as_Register();
       
  1774     }
       
  1775   }
       
  1776 
       
  1777   if (has_receiver) {
       
  1778     // Make sure the receiver is loaded into a register.
       
  1779     assert(method->size_of_parameters() > 0, "oob");
       
  1780     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
       
  1781     VMReg r = regs[0].first();
       
  1782     assert(r->is_valid(), "bad receiver arg");
       
  1783     if (r->is_stack()) {
       
  1784       // Porting note:  This assumes that compiled calling conventions always
       
  1785       // pass the receiver oop in a register.  If this is not true on some
       
  1786       // platform, pick a temp and load the receiver from stack.
       
  1787       fatal("receiver always in a register");
       
  1788       receiver_reg = j_rarg0;  // known to be free at this point
       
  1789       __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
       
  1790     } else {
       
  1791       // no data motion is needed
       
  1792       receiver_reg = r->as_Register();
       
  1793     }
       
  1794   }
       
  1795 
       
  1796   // Figure out which address we are really jumping to:
       
  1797   MethodHandles::generate_method_handle_dispatch(masm, iid,
       
  1798                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
       
  1799 }
       
  1800 
       
  1801 // ---------------------------------------------------------------------------
       
  1802 // Generate a native wrapper for a given method.  The method takes arguments
       
  1803 // in the Java compiled code convention, marshals them to the native
       
  1804 // convention (handlizes oops, etc), transitions to native, makes the call,
       
  1805 // returns to java state (possibly blocking), unhandlizes any result and
       
  1806 // returns.
       
  1807 //
       
  1808 // Critical native functions are a shorthand for the use of
       
  1809 // GetPrimtiveArrayCritical and disallow the use of any other JNI
       
  1810 // functions.  The wrapper is expected to unpack the arguments before
       
  1811 // passing them to the callee and perform checks before and after the
       
  1812 // native call to ensure that they GCLocker
       
  1813 // lock_critical/unlock_critical semantics are followed.  Some other
       
  1814 // parts of JNI setup are skipped like the tear down of the JNI handle
       
  1815 // block and the check for pending exceptions it's impossible for them
       
  1816 // to be thrown.
       
  1817 //
       
  1818 // They are roughly structured like this:
       
  1819 //    if (GCLocker::needs_gc())
       
  1820 //      SharedRuntime::block_for_jni_critical();
       
  1821 //    tranistion to thread_in_native
       
  1822 //    unpack arrray arguments and call native entry point
       
  1823 //    check for safepoint in progress
       
  1824 //    check if any thread suspend flags are set
       
  1825 //      call into JVM and possible unlock the JNI critical
       
  1826 //      if a GC was suppressed while in the critical native.
       
  1827 //    transition back to thread_in_Java
       
  1828 //    return to caller
       
  1829 //
       
  1830 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       
  1831                                                 const methodHandle& method,
       
  1832                                                 int compile_id,
       
  1833                                                 BasicType* in_sig_bt,
       
  1834                                                 VMRegPair* in_regs,
       
  1835                                                 BasicType ret_type) {
       
  1836   if (method->is_method_handle_intrinsic()) {
       
  1837     vmIntrinsics::ID iid = method->intrinsic_id();
       
  1838     intptr_t start = (intptr_t)__ pc();
       
  1839     int vep_offset = ((intptr_t)__ pc()) - start;
       
  1840     gen_special_dispatch(masm,
       
  1841                          method,
       
  1842                          in_sig_bt,
       
  1843                          in_regs);
       
  1844     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
       
  1845     __ flush();
       
  1846     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
       
  1847     return nmethod::new_native_nmethod(method,
       
  1848                                        compile_id,
       
  1849                                        masm->code(),
       
  1850                                        vep_offset,
       
  1851                                        frame_complete,
       
  1852                                        stack_slots / VMRegImpl::slots_per_word,
       
  1853                                        in_ByteSize(-1),
       
  1854                                        in_ByteSize(-1),
       
  1855                                        (OopMapSet*)NULL);
       
  1856   }
       
  1857   bool is_critical_native = true;
       
  1858   address native_func = method->critical_native_function();
       
  1859   if (native_func == NULL) {
       
  1860     native_func = method->native_function();
       
  1861     is_critical_native = false;
       
  1862   }
       
  1863   assert(native_func != NULL, "must have function");
       
  1864 
       
  1865   // An OopMap for lock (and class if static)
       
  1866   OopMapSet *oop_maps = new OopMapSet();
       
  1867   intptr_t start = (intptr_t)__ pc();
       
  1868 
       
  1869   // We have received a description of where all the java arg are located
       
  1870   // on entry to the wrapper. We need to convert these args to where
       
  1871   // the jni function will expect them. To figure out where they go
       
  1872   // we convert the java signature to a C signature by inserting
       
  1873   // the hidden arguments as arg[0] and possibly arg[1] (static method)
       
  1874 
       
  1875   const int total_in_args = method->size_of_parameters();
       
  1876   int total_c_args = total_in_args;
       
  1877   if (!is_critical_native) {
       
  1878     total_c_args += 1;
       
  1879     if (method->is_static()) {
       
  1880       total_c_args++;
       
  1881     }
       
  1882   } else {
       
  1883     for (int i = 0; i < total_in_args; i++) {
       
  1884       if (in_sig_bt[i] == T_ARRAY) {
       
  1885         total_c_args++;
       
  1886       }
       
  1887     }
       
  1888   }
       
  1889 
       
  1890   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
       
  1891   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
       
  1892   BasicType* in_elem_bt = NULL;
       
  1893 
       
  1894   int argc = 0;
       
  1895   if (!is_critical_native) {
       
  1896     out_sig_bt[argc++] = T_ADDRESS;
       
  1897     if (method->is_static()) {
       
  1898       out_sig_bt[argc++] = T_OBJECT;
       
  1899     }
       
  1900 
       
  1901     for (int i = 0; i < total_in_args ; i++ ) {
       
  1902       out_sig_bt[argc++] = in_sig_bt[i];
       
  1903     }
       
  1904   } else {
       
  1905     Thread* THREAD = Thread::current();
       
  1906     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
       
  1907     SignatureStream ss(method->signature());
       
  1908     for (int i = 0; i < total_in_args ; i++ ) {
       
  1909       if (in_sig_bt[i] == T_ARRAY) {
       
  1910         // Arrays are passed as int, elem* pair
       
  1911         out_sig_bt[argc++] = T_INT;
       
  1912         out_sig_bt[argc++] = T_ADDRESS;
       
  1913         Symbol* atype = ss.as_symbol(CHECK_NULL);
       
  1914         const char* at = atype->as_C_string();
       
  1915         if (strlen(at) == 2) {
       
  1916           assert(at[0] == '[', "must be");
       
  1917           switch (at[1]) {
       
  1918             case 'B': in_elem_bt[i]  = T_BYTE; break;
       
  1919             case 'C': in_elem_bt[i]  = T_CHAR; break;
       
  1920             case 'D': in_elem_bt[i]  = T_DOUBLE; break;
       
  1921             case 'F': in_elem_bt[i]  = T_FLOAT; break;
       
  1922             case 'I': in_elem_bt[i]  = T_INT; break;
       
  1923             case 'J': in_elem_bt[i]  = T_LONG; break;
       
  1924             case 'S': in_elem_bt[i]  = T_SHORT; break;
       
  1925             case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
       
  1926             default: ShouldNotReachHere();
       
  1927           }
       
  1928         }
       
  1929       } else {
       
  1930         out_sig_bt[argc++] = in_sig_bt[i];
       
  1931         in_elem_bt[i] = T_VOID;
       
  1932       }
       
  1933       if (in_sig_bt[i] != T_VOID) {
       
  1934         assert(in_sig_bt[i] == ss.type(), "must match");
       
  1935         ss.next();
       
  1936       }
       
  1937     }
       
  1938   }
       
  1939 
       
  1940   // Now figure out where the args must be stored and how much stack space
       
  1941   // they require.
       
  1942   int out_arg_slots;
       
  1943   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
       
  1944 
       
  1945   // Compute framesize for the wrapper.  We need to handlize all oops in
       
  1946   // incoming registers
       
  1947 
       
  1948   // Calculate the total number of stack slots we will need.
       
  1949 
       
  1950   // First count the abi requirement plus all of the outgoing args
       
  1951   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
       
  1952 
       
  1953   // Now the space for the inbound oop handle area
       
  1954   int total_save_slots = 6 * VMRegImpl::slots_per_word;  // 6 arguments passed in registers
       
  1955   if (is_critical_native) {
       
  1956     // Critical natives may have to call out so they need a save area
       
  1957     // for register arguments.
       
  1958     int double_slots = 0;
       
  1959     int single_slots = 0;
       
  1960     for ( int i = 0; i < total_in_args; i++) {
       
  1961       if (in_regs[i].first()->is_Register()) {
       
  1962         const Register reg = in_regs[i].first()->as_Register();
       
  1963         switch (in_sig_bt[i]) {
       
  1964           case T_BOOLEAN:
       
  1965           case T_BYTE:
       
  1966           case T_SHORT:
       
  1967           case T_CHAR:
       
  1968           case T_INT:  single_slots++; break;
       
  1969           case T_ARRAY:  // specific to LP64 (7145024)
       
  1970           case T_LONG: double_slots++; break;
       
  1971           default:  ShouldNotReachHere();
       
  1972         }
       
  1973       } else if (in_regs[i].first()->is_XMMRegister()) {
       
  1974         switch (in_sig_bt[i]) {
       
  1975           case T_FLOAT:  single_slots++; break;
       
  1976           case T_DOUBLE: double_slots++; break;
       
  1977           default:  ShouldNotReachHere();
       
  1978         }
       
  1979       } else if (in_regs[i].first()->is_FloatRegister()) {
       
  1980         ShouldNotReachHere();
       
  1981       }
       
  1982     }
       
  1983     total_save_slots = double_slots * 2 + single_slots;
       
  1984     // align the save area
       
  1985     if (double_slots != 0) {
       
  1986       stack_slots = align_up(stack_slots, 2);
       
  1987     }
       
  1988   }
       
  1989 
       
  1990   int oop_handle_offset = stack_slots;
       
  1991   stack_slots += total_save_slots;
       
  1992 
       
  1993   // Now any space we need for handlizing a klass if static method
       
  1994 
       
  1995   int klass_slot_offset = 0;
       
  1996   int klass_offset = -1;
       
  1997   int lock_slot_offset = 0;
       
  1998   bool is_static = false;
       
  1999 
       
  2000   if (method->is_static()) {
       
  2001     klass_slot_offset = stack_slots;
       
  2002     stack_slots += VMRegImpl::slots_per_word;
       
  2003     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
       
  2004     is_static = true;
       
  2005   }
       
  2006 
       
  2007   // Plus a lock if needed
       
  2008 
       
  2009   if (method->is_synchronized()) {
       
  2010     lock_slot_offset = stack_slots;
       
  2011     stack_slots += VMRegImpl::slots_per_word;
       
  2012   }
       
  2013 
       
  2014   // Now a place (+2) to save return values or temp during shuffling
       
  2015   // + 4 for return address (which we own) and saved rbp
       
  2016   stack_slots += 6;
       
  2017 
       
  2018   // Ok The space we have allocated will look like:
       
  2019   //
       
  2020   //
       
  2021   // FP-> |                     |
       
  2022   //      |---------------------|
       
  2023   //      | 2 slots for moves   |
       
  2024   //      |---------------------|
       
  2025   //      | lock box (if sync)  |
       
  2026   //      |---------------------| <- lock_slot_offset
       
  2027   //      | klass (if static)   |
       
  2028   //      |---------------------| <- klass_slot_offset
       
  2029   //      | oopHandle area      |
       
  2030   //      |---------------------| <- oop_handle_offset (6 java arg registers)
       
  2031   //      | outbound memory     |
       
  2032   //      | based arguments     |
       
  2033   //      |                     |
       
  2034   //      |---------------------|
       
  2035   //      |                     |
       
  2036   // SP-> | out_preserved_slots |
       
  2037   //
       
  2038   //
       
  2039 
       
  2040 
       
  2041   // Now compute actual number of stack words we need rounding to make
       
  2042   // stack properly aligned.
       
  2043   stack_slots = align_up(stack_slots, StackAlignmentInSlots);
       
  2044 
       
  2045   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
       
  2046 
       
  2047   // First thing make an ic check to see if we should even be here
       
  2048 
       
  2049   // We are free to use all registers as temps without saving them and
       
  2050   // restoring them except rbp. rbp is the only callee save register
       
  2051   // as far as the interpreter and the compiler(s) are concerned.
       
  2052 
       
  2053 
       
  2054   const Register ic_reg = rax;
       
  2055   const Register receiver = j_rarg0;
       
  2056 
       
  2057   Label hit;
       
  2058   Label exception_pending;
       
  2059 
       
  2060   assert_different_registers(ic_reg, receiver, rscratch1);
       
  2061   __ verify_oop(receiver);
       
  2062   __ load_klass(rscratch1, receiver);
       
  2063   __ cmpq(ic_reg, rscratch1);
       
  2064   __ jcc(Assembler::equal, hit);
       
  2065 
       
  2066   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
       
  2067 
       
  2068   // Verified entry point must be aligned
       
  2069   __ align(8);
       
  2070 
       
  2071   __ bind(hit);
       
  2072 
       
  2073   int vep_offset = ((intptr_t)__ pc()) - start;
       
  2074 
       
  2075 #ifdef COMPILER1
       
  2076   // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
       
  2077   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
       
  2078     inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/);
       
  2079   }
       
  2080 #endif // COMPILER1
       
  2081 
       
  2082   // The instruction at the verified entry point must be 5 bytes or longer
       
  2083   // because it can be patched on the fly by make_non_entrant. The stack bang
       
  2084   // instruction fits that requirement.
       
  2085 
       
  2086   // Generate stack overflow check
       
  2087 
       
  2088   if (UseStackBanging) {
       
  2089     __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
       
  2090   } else {
       
  2091     // need a 5 byte instruction to allow MT safe patching to non-entrant
       
  2092     __ fat_nop();
       
  2093   }
       
  2094 
       
  2095   // Generate a new frame for the wrapper.
       
  2096   __ enter();
       
  2097   // -2 because return address is already present and so is saved rbp
       
  2098   __ subptr(rsp, stack_size - 2*wordSize);
       
  2099 
       
  2100   // Frame is now completed as far as size and linkage.
       
  2101   int frame_complete = ((intptr_t)__ pc()) - start;
       
  2102 
       
  2103     if (UseRTMLocking) {
       
  2104       // Abort RTM transaction before calling JNI
       
  2105       // because critical section will be large and will be
       
  2106       // aborted anyway. Also nmethod could be deoptimized.
       
  2107       __ xabort(0);
       
  2108     }
       
  2109 
       
  2110 #ifdef ASSERT
       
  2111     {
       
  2112       Label L;
       
  2113       __ mov(rax, rsp);
       
  2114       __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI)
       
  2115       __ cmpptr(rax, rsp);
       
  2116       __ jcc(Assembler::equal, L);
       
  2117       __ stop("improperly aligned stack");
       
  2118       __ bind(L);
       
  2119     }
       
  2120 #endif /* ASSERT */
       
  2121 
       
  2122 
       
  2123   // We use r14 as the oop handle for the receiver/klass
       
  2124   // It is callee save so it survives the call to native
       
  2125 
       
  2126   const Register oop_handle_reg = r14;
       
  2127 
       
  2128   if (is_critical_native) {
       
  2129     check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
       
  2130                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
       
  2131   }
       
  2132 
       
  2133   //
       
  2134   // We immediately shuffle the arguments so that any vm call we have to
       
  2135   // make from here on out (sync slow path, jvmti, etc.) we will have
       
  2136   // captured the oops from our caller and have a valid oopMap for
       
  2137   // them.
       
  2138 
       
  2139   // -----------------
       
  2140   // The Grand Shuffle
       
  2141 
       
  2142   // The Java calling convention is either equal (linux) or denser (win64) than the
       
  2143   // c calling convention. However the because of the jni_env argument the c calling
       
  2144   // convention always has at least one more (and two for static) arguments than Java.
       
  2145   // Therefore if we move the args from java -> c backwards then we will never have
       
  2146   // a register->register conflict and we don't have to build a dependency graph
       
  2147   // and figure out how to break any cycles.
       
  2148   //
       
  2149 
       
  2150   // Record esp-based slot for receiver on stack for non-static methods
       
  2151   int receiver_offset = -1;
       
  2152 
       
  2153   // This is a trick. We double the stack slots so we can claim
       
  2154   // the oops in the caller's frame. Since we are sure to have
       
  2155   // more args than the caller doubling is enough to make
       
  2156   // sure we can capture all the incoming oop args from the
       
  2157   // caller.
       
  2158   //
       
  2159   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
       
  2160 
       
  2161   // Mark location of rbp (someday)
       
  2162   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
       
  2163 
       
  2164   // Use eax, ebx as temporaries during any memory-memory moves we have to do
       
  2165   // All inbound args are referenced based on rbp and all outbound args via rsp.
       
  2166 
       
  2167 
       
  2168 #ifdef ASSERT
       
  2169   bool reg_destroyed[RegisterImpl::number_of_registers];
       
  2170   bool freg_destroyed[XMMRegisterImpl::number_of_registers];
       
  2171   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
       
  2172     reg_destroyed[r] = false;
       
  2173   }
       
  2174   for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
       
  2175     freg_destroyed[f] = false;
       
  2176   }
       
  2177 
       
  2178 #endif /* ASSERT */
       
  2179 
       
  2180   // This may iterate in two different directions depending on the
       
  2181   // kind of native it is.  The reason is that for regular JNI natives
       
  2182   // the incoming and outgoing registers are offset upwards and for
       
  2183   // critical natives they are offset down.
       
  2184   GrowableArray<int> arg_order(2 * total_in_args);
       
  2185   VMRegPair tmp_vmreg;
       
  2186   tmp_vmreg.set1(rbx->as_VMReg());
       
  2187 
       
  2188   if (!is_critical_native) {
       
  2189     for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
       
  2190       arg_order.push(i);
       
  2191       arg_order.push(c_arg);
       
  2192     }
       
  2193   } else {
       
  2194     // Compute a valid move order, using tmp_vmreg to break any cycles
       
  2195     ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
       
  2196   }
       
  2197 
       
  2198   int temploc = -1;
       
  2199   for (int ai = 0; ai < arg_order.length(); ai += 2) {
       
  2200     int i = arg_order.at(ai);
       
  2201     int c_arg = arg_order.at(ai + 1);
       
  2202     __ block_comment(err_msg("move %d -> %d", i, c_arg));
       
  2203     if (c_arg == -1) {
       
  2204       assert(is_critical_native, "should only be required for critical natives");
       
  2205       // This arg needs to be moved to a temporary
       
  2206       __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
       
  2207       in_regs[i] = tmp_vmreg;
       
  2208       temploc = i;
       
  2209       continue;
       
  2210     } else if (i == -1) {
       
  2211       assert(is_critical_native, "should only be required for critical natives");
       
  2212       // Read from the temporary location
       
  2213       assert(temploc != -1, "must be valid");
       
  2214       i = temploc;
       
  2215       temploc = -1;
       
  2216     }
       
  2217 #ifdef ASSERT
       
  2218     if (in_regs[i].first()->is_Register()) {
       
  2219       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
       
  2220     } else if (in_regs[i].first()->is_XMMRegister()) {
       
  2221       assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
       
  2222     }
       
  2223     if (out_regs[c_arg].first()->is_Register()) {
       
  2224       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
       
  2225     } else if (out_regs[c_arg].first()->is_XMMRegister()) {
       
  2226       freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
       
  2227     }
       
  2228 #endif /* ASSERT */
       
  2229     switch (in_sig_bt[i]) {
       
  2230       case T_ARRAY:
       
  2231         if (is_critical_native) {
       
  2232           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
       
  2233           c_arg++;
       
  2234 #ifdef ASSERT
       
  2235           if (out_regs[c_arg].first()->is_Register()) {
       
  2236             reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
       
  2237           } else if (out_regs[c_arg].first()->is_XMMRegister()) {
       
  2238             freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
       
  2239           }
       
  2240 #endif
       
  2241           break;
       
  2242         }
       
  2243       case T_OBJECT:
       
  2244         assert(!is_critical_native, "no oop arguments");
       
  2245         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
       
  2246                     ((i == 0) && (!is_static)),
       
  2247                     &receiver_offset);
       
  2248         break;
       
  2249       case T_VOID:
       
  2250         break;
       
  2251 
       
  2252       case T_FLOAT:
       
  2253         float_move(masm, in_regs[i], out_regs[c_arg]);
       
  2254           break;
       
  2255 
       
  2256       case T_DOUBLE:
       
  2257         assert( i + 1 < total_in_args &&
       
  2258                 in_sig_bt[i + 1] == T_VOID &&
       
  2259                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
       
  2260         double_move(masm, in_regs[i], out_regs[c_arg]);
       
  2261         break;
       
  2262 
       
  2263       case T_LONG :
       
  2264         long_move(masm, in_regs[i], out_regs[c_arg]);
       
  2265         break;
       
  2266 
       
  2267       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
       
  2268 
       
  2269       default:
       
  2270         move32_64(masm, in_regs[i], out_regs[c_arg]);
       
  2271     }
       
  2272   }
       
  2273 
       
  2274   int c_arg;
       
  2275 
       
  2276   // Pre-load a static method's oop into r14.  Used both by locking code and
       
  2277   // the normal JNI call code.
       
  2278   if (!is_critical_native) {
       
  2279     // point c_arg at the first arg that is already loaded in case we
       
  2280     // need to spill before we call out
       
  2281     c_arg = total_c_args - total_in_args;
       
  2282 
       
  2283     if (method->is_static()) {
       
  2284 
       
  2285       //  load oop into a register
       
  2286       __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
       
  2287 
       
  2288       // Now handlize the static class mirror it's known not-null.
       
  2289       __ movptr(Address(rsp, klass_offset), oop_handle_reg);
       
  2290       map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
       
  2291 
       
  2292       // Now get the handle
       
  2293       __ lea(oop_handle_reg, Address(rsp, klass_offset));
       
  2294       // store the klass handle as second argument
       
  2295       __ movptr(c_rarg1, oop_handle_reg);
       
  2296       // and protect the arg if we must spill
       
  2297       c_arg--;
       
  2298     }
       
  2299   } else {
       
  2300     // For JNI critical methods we need to save all registers in save_args.
       
  2301     c_arg = 0;
       
  2302   }
       
  2303 
       
  2304   // Change state to native (we save the return address in the thread, since it might not
       
  2305   // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
       
  2306   // points into the right code segment. It does not have to be the correct return pc.
       
  2307   // We use the same pc/oopMap repeatedly when we call out
       
  2308 
       
  2309   intptr_t the_pc = (intptr_t) __ pc();
       
  2310   oop_maps->add_gc_map(the_pc - start, map);
       
  2311 
       
  2312   __ set_last_Java_frame(rsp, noreg, (address)the_pc);
       
  2313 
       
  2314 
       
  2315   // We have all of the arguments setup at this point. We must not touch any register
       
  2316   // argument registers at this point (what if we save/restore them there are no oop?
       
  2317 
       
  2318   {
       
  2319     SkipIfEqual skip(masm, &DTraceMethodProbes, false);
       
  2320     // protect the args we've loaded
       
  2321     save_args(masm, total_c_args, c_arg, out_regs);
       
  2322     __ mov_metadata(c_rarg1, method());
       
  2323     __ call_VM_leaf(
       
  2324       CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
       
  2325       r15_thread, c_rarg1);
       
  2326     restore_args(masm, total_c_args, c_arg, out_regs);
       
  2327   }
       
  2328 
       
  2329   // RedefineClasses() tracing support for obsolete method entry
       
  2330   if (log_is_enabled(Trace, redefine, class, obsolete)) {
       
  2331     // protect the args we've loaded
       
  2332     save_args(masm, total_c_args, c_arg, out_regs);
       
  2333     __ mov_metadata(c_rarg1, method());
       
  2334     __ call_VM_leaf(
       
  2335       CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
       
  2336       r15_thread, c_rarg1);
       
  2337     restore_args(masm, total_c_args, c_arg, out_regs);
       
  2338   }
       
  2339 
       
  2340   // Lock a synchronized method
       
  2341 
       
  2342   // Register definitions used by locking and unlocking
       
  2343 
       
  2344   const Register swap_reg = rax;  // Must use rax for cmpxchg instruction
       
  2345   const Register obj_reg  = rbx;  // Will contain the oop
       
  2346   const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
       
  2347   const Register old_hdr  = r13;  // value of old header at unlock time
       
  2348 
       
  2349   Label slow_path_lock;
       
  2350   Label lock_done;
       
  2351 
       
  2352   if (method->is_synchronized()) {
       
  2353     assert(!is_critical_native, "unhandled");
       
  2354 
       
  2355 
       
  2356     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
       
  2357 
       
  2358     // Get the handle (the 2nd argument)
       
  2359     __ mov(oop_handle_reg, c_rarg1);
       
  2360 
       
  2361     // Get address of the box
       
  2362 
       
  2363     __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
       
  2364 
       
  2365     // Load the oop from the handle
       
  2366     __ movptr(obj_reg, Address(oop_handle_reg, 0));
       
  2367 
       
  2368     if (UseBiasedLocking) {
       
  2369       __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock);
       
  2370     }
       
  2371 
       
  2372     // Load immediate 1 into swap_reg %rax
       
  2373     __ movl(swap_reg, 1);
       
  2374 
       
  2375     // Load (object->mark() | 1) into swap_reg %rax
       
  2376     __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
       
  2377 
       
  2378     // Save (object->mark() | 1) into BasicLock's displaced header
       
  2379     __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
       
  2380 
       
  2381     if (os::is_MP()) {
       
  2382       __ lock();
       
  2383     }
       
  2384 
       
  2385     // src -> dest iff dest == rax else rax <- dest
       
  2386     __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
       
  2387     __ jcc(Assembler::equal, lock_done);
       
  2388 
       
  2389     // Hmm should this move to the slow path code area???
       
  2390 
       
  2391     // Test if the oopMark is an obvious stack pointer, i.e.,
       
  2392     //  1) (mark & 3) == 0, and
       
  2393     //  2) rsp <= mark < mark + os::pagesize()
       
  2394     // These 3 tests can be done by evaluating the following
       
  2395     // expression: ((mark - rsp) & (3 - os::vm_page_size())),
       
  2396     // assuming both stack pointer and pagesize have their
       
  2397     // least significant 2 bits clear.
       
  2398     // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
       
  2399 
       
  2400     __ subptr(swap_reg, rsp);
       
  2401     __ andptr(swap_reg, 3 - os::vm_page_size());
       
  2402 
       
  2403     // Save the test result, for recursive case, the result is zero
       
  2404     __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
       
  2405     __ jcc(Assembler::notEqual, slow_path_lock);
       
  2406 
       
  2407     // Slow path will re-enter here
       
  2408 
       
  2409     __ bind(lock_done);
       
  2410   }
       
  2411 
       
  2412 
       
  2413   // Finally just about ready to make the JNI call
       
  2414 
       
  2415 
       
  2416   // get JNIEnv* which is first argument to native
       
  2417   if (!is_critical_native) {
       
  2418     __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
       
  2419   }
       
  2420 
       
  2421   // Now set thread in native
       
  2422   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
       
  2423 
       
  2424   __ call(RuntimeAddress(native_func));
       
  2425 
       
  2426   // Verify or restore cpu control state after JNI call
       
  2427   __ restore_cpu_control_state_after_jni();
       
  2428 
       
  2429   // Unpack native results.
       
  2430   switch (ret_type) {
       
  2431   case T_BOOLEAN: __ c2bool(rax);            break;
       
  2432   case T_CHAR   : __ movzwl(rax, rax);      break;
       
  2433   case T_BYTE   : __ sign_extend_byte (rax); break;
       
  2434   case T_SHORT  : __ sign_extend_short(rax); break;
       
  2435   case T_INT    : /* nothing to do */        break;
       
  2436   case T_DOUBLE :
       
  2437   case T_FLOAT  :
       
  2438     // Result is in xmm0 we'll save as needed
       
  2439     break;
       
  2440   case T_ARRAY:                 // Really a handle
       
  2441   case T_OBJECT:                // Really a handle
       
  2442       break; // can't de-handlize until after safepoint check
       
  2443   case T_VOID: break;
       
  2444   case T_LONG: break;
       
  2445   default       : ShouldNotReachHere();
       
  2446   }
       
  2447 
       
  2448   // Switch thread to "native transition" state before reading the synchronization state.
       
  2449   // This additional state is necessary because reading and testing the synchronization
       
  2450   // state is not atomic w.r.t. GC, as this scenario demonstrates:
       
  2451   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
       
  2452   //     VM thread changes sync state to synchronizing and suspends threads for GC.
       
  2453   //     Thread A is resumed to finish this native method, but doesn't block here since it
       
  2454   //     didn't see any synchronization is progress, and escapes.
       
  2455   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
       
  2456 
       
  2457   if(os::is_MP()) {
       
  2458     if (UseMembar) {
       
  2459       // Force this write out before the read below
       
  2460       __ membar(Assembler::Membar_mask_bits(
       
  2461            Assembler::LoadLoad | Assembler::LoadStore |
       
  2462            Assembler::StoreLoad | Assembler::StoreStore));
       
  2463     } else {
       
  2464       // Write serialization page so VM thread can do a pseudo remote membar.
       
  2465       // We use the current thread pointer to calculate a thread specific
       
  2466       // offset to write to within the page. This minimizes bus traffic
       
  2467       // due to cache line collision.
       
  2468       __ serialize_memory(r15_thread, rcx);
       
  2469     }
       
  2470   }
       
  2471 
       
  2472   Label after_transition;
       
  2473 
       
  2474   // check for safepoint operation in progress and/or pending suspend requests
       
  2475   {
       
  2476     Label Continue;
       
  2477 
       
  2478     __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
       
  2479              SafepointSynchronize::_not_synchronized);
       
  2480 
       
  2481     Label L;
       
  2482     __ jcc(Assembler::notEqual, L);
       
  2483     __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
       
  2484     __ jcc(Assembler::equal, Continue);
       
  2485     __ bind(L);
       
  2486 
       
  2487     // Don't use call_VM as it will see a possible pending exception and forward it
       
  2488     // and never return here preventing us from clearing _last_native_pc down below.
       
  2489     // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
       
  2490     // preserved and correspond to the bcp/locals pointers. So we do a runtime call
       
  2491     // by hand.
       
  2492     //
       
  2493     __ vzeroupper();
       
  2494     save_native_result(masm, ret_type, stack_slots);
       
  2495     __ mov(c_rarg0, r15_thread);
       
  2496     __ mov(r12, rsp); // remember sp
       
  2497     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
       
  2498     __ andptr(rsp, -16); // align stack as required by ABI
       
  2499     if (!is_critical_native) {
       
  2500       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
       
  2501     } else {
       
  2502       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
       
  2503     }
       
  2504     __ mov(rsp, r12); // restore sp
       
  2505     __ reinit_heapbase();
       
  2506     // Restore any method result value
       
  2507     restore_native_result(masm, ret_type, stack_slots);
       
  2508 
       
  2509     if (is_critical_native) {
       
  2510       // The call above performed the transition to thread_in_Java so
       
  2511       // skip the transition logic below.
       
  2512       __ jmpb(after_transition);
       
  2513     }
       
  2514 
       
  2515     __ bind(Continue);
       
  2516   }
       
  2517 
       
  2518   // change thread state
       
  2519   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
       
  2520   __ bind(after_transition);
       
  2521 
       
  2522   Label reguard;
       
  2523   Label reguard_done;
       
  2524   __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_reserved_disabled);
       
  2525   __ jcc(Assembler::equal, reguard);
       
  2526   __ bind(reguard_done);
       
  2527 
       
  2528   // native result if any is live
       
  2529 
       
  2530   // Unlock
       
  2531   Label unlock_done;
       
  2532   Label slow_path_unlock;
       
  2533   if (method->is_synchronized()) {
       
  2534 
       
  2535     // Get locked oop from the handle we passed to jni
       
  2536     __ movptr(obj_reg, Address(oop_handle_reg, 0));
       
  2537 
       
  2538     Label done;
       
  2539 
       
  2540     if (UseBiasedLocking) {
       
  2541       __ biased_locking_exit(obj_reg, old_hdr, done);
       
  2542     }
       
  2543 
       
  2544     // Simple recursive lock?
       
  2545 
       
  2546     __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD);
       
  2547     __ jcc(Assembler::equal, done);
       
  2548 
       
  2549     // Must save rax if if it is live now because cmpxchg must use it
       
  2550     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
       
  2551       save_native_result(masm, ret_type, stack_slots);
       
  2552     }
       
  2553 
       
  2554 
       
  2555     // get address of the stack lock
       
  2556     __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
       
  2557     //  get old displaced header
       
  2558     __ movptr(old_hdr, Address(rax, 0));
       
  2559 
       
  2560     // Atomic swap old header if oop still contains the stack lock
       
  2561     if (os::is_MP()) {
       
  2562       __ lock();
       
  2563     }
       
  2564     __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
       
  2565     __ jcc(Assembler::notEqual, slow_path_unlock);
       
  2566 
       
  2567     // slow path re-enters here
       
  2568     __ bind(unlock_done);
       
  2569     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
       
  2570       restore_native_result(masm, ret_type, stack_slots);
       
  2571     }
       
  2572 
       
  2573     __ bind(done);
       
  2574 
       
  2575   }
       
  2576   {
       
  2577     SkipIfEqual skip(masm, &DTraceMethodProbes, false);
       
  2578     save_native_result(masm, ret_type, stack_slots);
       
  2579     __ mov_metadata(c_rarg1, method());
       
  2580     __ call_VM_leaf(
       
  2581          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
       
  2582          r15_thread, c_rarg1);
       
  2583     restore_native_result(masm, ret_type, stack_slots);
       
  2584   }
       
  2585 
       
  2586   __ reset_last_Java_frame(false);
       
  2587 
       
  2588   // Unbox oop result, e.g. JNIHandles::resolve value.
       
  2589   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
       
  2590     __ resolve_jobject(rax /* value */,
       
  2591                        r15_thread /* thread */,
       
  2592                        rcx /* tmp */);
       
  2593   }
       
  2594 
       
  2595   if (CheckJNICalls) {
       
  2596     // clear_pending_jni_exception_check
       
  2597     __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
       
  2598   }
       
  2599 
       
  2600   if (!is_critical_native) {
       
  2601     // reset handle block
       
  2602     __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
       
  2603     __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
       
  2604   }
       
  2605 
       
  2606   // pop our frame
       
  2607 
       
  2608   __ leave();
       
  2609 
       
  2610   if (!is_critical_native) {
       
  2611     // Any exception pending?
       
  2612     __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
       
  2613     __ jcc(Assembler::notEqual, exception_pending);
       
  2614   }
       
  2615 
       
  2616   // Return
       
  2617 
       
  2618   __ ret(0);
       
  2619 
       
  2620   // Unexpected paths are out of line and go here
       
  2621 
       
  2622   if (!is_critical_native) {
       
  2623     // forward the exception
       
  2624     __ bind(exception_pending);
       
  2625 
       
  2626     // and forward the exception
       
  2627     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
       
  2628   }
       
  2629 
       
  2630   // Slow path locking & unlocking
       
  2631   if (method->is_synchronized()) {
       
  2632 
       
  2633     // BEGIN Slow path lock
       
  2634     __ bind(slow_path_lock);
       
  2635 
       
  2636     // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
       
  2637     // args are (oop obj, BasicLock* lock, JavaThread* thread)
       
  2638 
       
  2639     // protect the args we've loaded
       
  2640     save_args(masm, total_c_args, c_arg, out_regs);
       
  2641 
       
  2642     __ mov(c_rarg0, obj_reg);
       
  2643     __ mov(c_rarg1, lock_reg);
       
  2644     __ mov(c_rarg2, r15_thread);
       
  2645 
       
  2646     // Not a leaf but we have last_Java_frame setup as we want
       
  2647     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
       
  2648     restore_args(masm, total_c_args, c_arg, out_regs);
       
  2649 
       
  2650 #ifdef ASSERT
       
  2651     { Label L;
       
  2652     __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
       
  2653     __ jcc(Assembler::equal, L);
       
  2654     __ stop("no pending exception allowed on exit from monitorenter");
       
  2655     __ bind(L);
       
  2656     }
       
  2657 #endif
       
  2658     __ jmp(lock_done);
       
  2659 
       
  2660     // END Slow path lock
       
  2661 
       
  2662     // BEGIN Slow path unlock
       
  2663     __ bind(slow_path_unlock);
       
  2664 
       
  2665     // If we haven't already saved the native result we must save it now as xmm registers
       
  2666     // are still exposed.
       
  2667     __ vzeroupper();
       
  2668     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
       
  2669       save_native_result(masm, ret_type, stack_slots);
       
  2670     }
       
  2671 
       
  2672     __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
       
  2673 
       
  2674     __ mov(c_rarg0, obj_reg);
       
  2675     __ mov(c_rarg2, r15_thread);
       
  2676     __ mov(r12, rsp); // remember sp
       
  2677     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
       
  2678     __ andptr(rsp, -16); // align stack as required by ABI
       
  2679 
       
  2680     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
       
  2681     // NOTE that obj_reg == rbx currently
       
  2682     __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
       
  2683     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
       
  2684 
       
  2685     // args are (oop obj, BasicLock* lock, JavaThread* thread)
       
  2686     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
       
  2687     __ mov(rsp, r12); // restore sp
       
  2688     __ reinit_heapbase();
       
  2689 #ifdef ASSERT
       
  2690     {
       
  2691       Label L;
       
  2692       __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
       
  2693       __ jcc(Assembler::equal, L);
       
  2694       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
       
  2695       __ bind(L);
       
  2696     }
       
  2697 #endif /* ASSERT */
       
  2698 
       
  2699     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
       
  2700 
       
  2701     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
       
  2702       restore_native_result(masm, ret_type, stack_slots);
       
  2703     }
       
  2704     __ jmp(unlock_done);
       
  2705 
       
  2706     // END Slow path unlock
       
  2707 
       
  2708   } // synchronized
       
  2709 
       
  2710   // SLOW PATH Reguard the stack if needed
       
  2711 
       
  2712   __ bind(reguard);
       
  2713   __ vzeroupper();
       
  2714   save_native_result(masm, ret_type, stack_slots);
       
  2715   __ mov(r12, rsp); // remember sp
       
  2716   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
       
  2717   __ andptr(rsp, -16); // align stack as required by ABI
       
  2718   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
       
  2719   __ mov(rsp, r12); // restore sp
       
  2720   __ reinit_heapbase();
       
  2721   restore_native_result(masm, ret_type, stack_slots);
       
  2722   // and continue
       
  2723   __ jmp(reguard_done);
       
  2724 
       
  2725 
       
  2726 
       
  2727   __ flush();
       
  2728 
       
  2729   nmethod *nm = nmethod::new_native_nmethod(method,
       
  2730                                             compile_id,
       
  2731                                             masm->code(),
       
  2732                                             vep_offset,
       
  2733                                             frame_complete,
       
  2734                                             stack_slots / VMRegImpl::slots_per_word,
       
  2735                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
       
  2736                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
       
  2737                                             oop_maps);
       
  2738 
       
  2739   if (is_critical_native) {
       
  2740     nm->set_lazy_critical_native(true);
       
  2741   }
       
  2742 
       
  2743   return nm;
       
  2744 
       
  2745 }
       
  2746 
       
  2747 // this function returns the adjust size (in number of words) to a c2i adapter
       
  2748 // activation for use during deoptimization
       
  2749 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
       
  2750   return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
       
  2751 }
       
  2752 
       
  2753 
       
  2754 uint SharedRuntime::out_preserve_stack_slots() {
       
  2755   return 0;
       
  2756 }
       
  2757 
       
  2758 //------------------------------generate_deopt_blob----------------------------
       
  2759 void SharedRuntime::generate_deopt_blob() {
       
  2760   // Allocate space for the code
       
  2761   ResourceMark rm;
       
  2762   // Setup code generation tools
       
  2763   int pad = 0;
       
  2764 #if INCLUDE_JVMCI
       
  2765   if (EnableJVMCI || UseAOT) {
       
  2766     pad += 512; // Increase the buffer size when compiling for JVMCI
       
  2767   }
       
  2768 #endif
       
  2769   CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
       
  2770   MacroAssembler* masm = new MacroAssembler(&buffer);
       
  2771   int frame_size_in_words;
       
  2772   OopMap* map = NULL;
       
  2773   OopMapSet *oop_maps = new OopMapSet();
       
  2774 
       
  2775   // -------------
       
  2776   // This code enters when returning to a de-optimized nmethod.  A return
       
  2777   // address has been pushed on the the stack, and return values are in
       
  2778   // registers.
       
  2779   // If we are doing a normal deopt then we were called from the patched
       
  2780   // nmethod from the point we returned to the nmethod. So the return
       
  2781   // address on the stack is wrong by NativeCall::instruction_size
       
  2782   // We will adjust the value so it looks like we have the original return
       
  2783   // address on the stack (like when we eagerly deoptimized).
       
  2784   // In the case of an exception pending when deoptimizing, we enter
       
  2785   // with a return address on the stack that points after the call we patched
       
  2786   // into the exception handler. We have the following register state from,
       
  2787   // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
       
  2788   //    rax: exception oop
       
  2789   //    rbx: exception handler
       
  2790   //    rdx: throwing pc
       
  2791   // So in this case we simply jam rdx into the useless return address and
       
  2792   // the stack looks just like we want.
       
  2793   //
       
  2794   // At this point we need to de-opt.  We save the argument return
       
  2795   // registers.  We call the first C routine, fetch_unroll_info().  This
       
  2796   // routine captures the return values and returns a structure which
       
  2797   // describes the current frame size and the sizes of all replacement frames.
       
  2798   // The current frame is compiled code and may contain many inlined
       
  2799   // functions, each with their own JVM state.  We pop the current frame, then
       
  2800   // push all the new frames.  Then we call the C routine unpack_frames() to
       
  2801   // populate these frames.  Finally unpack_frames() returns us the new target
       
  2802   // address.  Notice that callee-save registers are BLOWN here; they have
       
  2803   // already been captured in the vframeArray at the time the return PC was
       
  2804   // patched.
       
  2805   address start = __ pc();
       
  2806   Label cont;
       
  2807 
       
  2808   // Prolog for non exception case!
       
  2809 
       
  2810   // Save everything in sight.
       
  2811   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
       
  2812 
       
  2813   // Normal deoptimization.  Save exec mode for unpack_frames.
       
  2814   __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
       
  2815   __ jmp(cont);
       
  2816 
       
  2817   int reexecute_offset = __ pc() - start;
       
  2818 #if INCLUDE_JVMCI && !defined(COMPILER1)
       
  2819   if (EnableJVMCI && UseJVMCICompiler) {
       
  2820     // JVMCI does not use this kind of deoptimization
       
  2821     __ should_not_reach_here();
       
  2822   }
       
  2823 #endif
       
  2824 
       
  2825   // Reexecute case
       
  2826   // return address is the pc describes what bci to do re-execute at
       
  2827 
       
  2828   // No need to update map as each call to save_live_registers will produce identical oopmap
       
  2829   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
       
  2830 
       
  2831   __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
       
  2832   __ jmp(cont);
       
  2833 
       
  2834 #if INCLUDE_JVMCI
       
  2835   Label after_fetch_unroll_info_call;
       
  2836   int implicit_exception_uncommon_trap_offset = 0;
       
  2837   int uncommon_trap_offset = 0;
       
  2838 
       
  2839   if (EnableJVMCI || UseAOT) {
       
  2840     implicit_exception_uncommon_trap_offset = __ pc() - start;
       
  2841 
       
  2842     __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
       
  2843     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), (int32_t)NULL_WORD);
       
  2844 
       
  2845     uncommon_trap_offset = __ pc() - start;
       
  2846 
       
  2847     // Save everything in sight.
       
  2848     RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
       
  2849     // fetch_unroll_info needs to call last_java_frame()
       
  2850     __ set_last_Java_frame(noreg, noreg, NULL);
       
  2851 
       
  2852     __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())));
       
  2853     __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1);
       
  2854 
       
  2855     __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute);
       
  2856     __ mov(c_rarg0, r15_thread);
       
  2857     __ movl(c_rarg2, r14); // exec mode
       
  2858     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
       
  2859     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
       
  2860 
       
  2861     __ reset_last_Java_frame(false);
       
  2862 
       
  2863     __ jmp(after_fetch_unroll_info_call);
       
  2864   } // EnableJVMCI
       
  2865 #endif // INCLUDE_JVMCI
       
  2866 
       
  2867   int exception_offset = __ pc() - start;
       
  2868 
       
  2869   // Prolog for exception case
       
  2870 
       
  2871   // all registers are dead at this entry point, except for rax, and
       
  2872   // rdx which contain the exception oop and exception pc
       
  2873   // respectively.  Set them in TLS and fall thru to the
       
  2874   // unpack_with_exception_in_tls entry point.
       
  2875 
       
  2876   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
       
  2877   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
       
  2878 
       
  2879   int exception_in_tls_offset = __ pc() - start;
       
  2880 
       
  2881   // new implementation because exception oop is now passed in JavaThread
       
  2882 
       
  2883   // Prolog for exception case
       
  2884   // All registers must be preserved because they might be used by LinearScan
       
  2885   // Exceptiop oop and throwing PC are passed in JavaThread
       
  2886   // tos: stack at point of call to method that threw the exception (i.e. only
       
  2887   // args are on the stack, no return address)
       
  2888 
       
  2889   // make room on stack for the return address
       
  2890   // It will be patched later with the throwing pc. The correct value is not
       
  2891   // available now because loading it from memory would destroy registers.
       
  2892   __ push(0);
       
  2893 
       
  2894   // Save everything in sight.
       
  2895   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
       
  2896 
       
  2897   // Now it is safe to overwrite any register
       
  2898 
       
  2899   // Deopt during an exception.  Save exec mode for unpack_frames.
       
  2900   __ movl(r14, Deoptimization::Unpack_exception); // callee-saved
       
  2901 
       
  2902   // load throwing pc from JavaThread and patch it as the return address
       
  2903   // of the current frame. Then clear the field in JavaThread
       
  2904 
       
  2905   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
       
  2906   __ movptr(Address(rbp, wordSize), rdx);
       
  2907   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
       
  2908 
       
  2909 #ifdef ASSERT
       
  2910   // verify that there is really an exception oop in JavaThread
       
  2911   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
       
  2912   __ verify_oop(rax);
       
  2913 
       
  2914   // verify that there is no pending exception
       
  2915   Label no_pending_exception;
       
  2916   __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
       
  2917   __ testptr(rax, rax);
       
  2918   __ jcc(Assembler::zero, no_pending_exception);
       
  2919   __ stop("must not have pending exception here");
       
  2920   __ bind(no_pending_exception);
       
  2921 #endif
       
  2922 
       
  2923   __ bind(cont);
       
  2924 
       
  2925   // Call C code.  Need thread and this frame, but NOT official VM entry
       
  2926   // crud.  We cannot block on this call, no GC can happen.
       
  2927   //
       
  2928   // UnrollBlock* fetch_unroll_info(JavaThread* thread)
       
  2929 
       
  2930   // fetch_unroll_info needs to call last_java_frame().
       
  2931 
       
  2932   __ set_last_Java_frame(noreg, noreg, NULL);
       
  2933 #ifdef ASSERT
       
  2934   { Label L;
       
  2935     __ cmpptr(Address(r15_thread,
       
  2936                     JavaThread::last_Java_fp_offset()),
       
  2937             (int32_t)0);
       
  2938     __ jcc(Assembler::equal, L);
       
  2939     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
       
  2940     __ bind(L);
       
  2941   }
       
  2942 #endif // ASSERT
       
  2943   __ mov(c_rarg0, r15_thread);
       
  2944   __ movl(c_rarg1, r14); // exec_mode
       
  2945   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
       
  2946 
       
  2947   // Need to have an oopmap that tells fetch_unroll_info where to
       
  2948   // find any register it might need.
       
  2949   oop_maps->add_gc_map(__ pc() - start, map);
       
  2950 
       
  2951   __ reset_last_Java_frame(false);
       
  2952 
       
  2953 #if INCLUDE_JVMCI
       
  2954   if (EnableJVMCI || UseAOT) {
       
  2955     __ bind(after_fetch_unroll_info_call);
       
  2956   }
       
  2957 #endif
       
  2958 
       
  2959   // Load UnrollBlock* into rdi
       
  2960   __ mov(rdi, rax);
       
  2961 
       
  2962   __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
       
  2963    Label noException;
       
  2964   __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
       
  2965   __ jcc(Assembler::notEqual, noException);
       
  2966   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
       
  2967   // QQQ this is useless it was NULL above
       
  2968   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
       
  2969   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
       
  2970   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
       
  2971 
       
  2972   __ verify_oop(rax);
       
  2973 
       
  2974   // Overwrite the result registers with the exception results.
       
  2975   __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
       
  2976   // I think this is useless
       
  2977   __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
       
  2978 
       
  2979   __ bind(noException);
       
  2980 
       
  2981   // Only register save data is on the stack.
       
  2982   // Now restore the result registers.  Everything else is either dead
       
  2983   // or captured in the vframeArray.
       
  2984   RegisterSaver::restore_result_registers(masm);
       
  2985 
       
  2986   // All of the register save area has been popped of the stack. Only the
       
  2987   // return address remains.
       
  2988 
       
  2989   // Pop all the frames we must move/replace.
       
  2990   //
       
  2991   // Frame picture (youngest to oldest)
       
  2992   // 1: self-frame (no frame link)
       
  2993   // 2: deopting frame  (no frame link)
       
  2994   // 3: caller of deopting frame (could be compiled/interpreted).
       
  2995   //
       
  2996   // Note: by leaving the return address of self-frame on the stack
       
  2997   // and using the size of frame 2 to adjust the stack
       
  2998   // when we are done the return to frame 3 will still be on the stack.
       
  2999 
       
  3000   // Pop deoptimized frame
       
  3001   __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
       
  3002   __ addptr(rsp, rcx);
       
  3003 
       
  3004   // rsp should be pointing at the return address to the caller (3)
       
  3005 
       
  3006   // Pick up the initial fp we should save
       
  3007   // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
       
  3008   __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
       
  3009 
       
  3010 #ifdef ASSERT
       
  3011   // Compilers generate code that bang the stack by as much as the
       
  3012   // interpreter would need. So this stack banging should never
       
  3013   // trigger a fault. Verify that it does not on non product builds.
       
  3014   if (UseStackBanging) {
       
  3015     __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
       
  3016     __ bang_stack_size(rbx, rcx);
       
  3017   }
       
  3018 #endif
       
  3019 
       
  3020   // Load address of array of frame pcs into rcx
       
  3021   __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
       
  3022 
       
  3023   // Trash the old pc
       
  3024   __ addptr(rsp, wordSize);
       
  3025 
       
  3026   // Load address of array of frame sizes into rsi
       
  3027   __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
       
  3028 
       
  3029   // Load counter into rdx
       
  3030   __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
       
  3031 
       
  3032   // Now adjust the caller's stack to make up for the extra locals
       
  3033   // but record the original sp so that we can save it in the skeletal interpreter
       
  3034   // frame and the stack walking of interpreter_sender will get the unextended sp
       
  3035   // value and not the "real" sp value.
       
  3036 
       
  3037   const Register sender_sp = r8;
       
  3038 
       
  3039   __ mov(sender_sp, rsp);
       
  3040   __ movl(rbx, Address(rdi,
       
  3041                        Deoptimization::UnrollBlock::
       
  3042                        caller_adjustment_offset_in_bytes()));
       
  3043   __ subptr(rsp, rbx);
       
  3044 
       
  3045   // Push interpreter frames in a loop
       
  3046   Label loop;
       
  3047   __ bind(loop);
       
  3048   __ movptr(rbx, Address(rsi, 0));      // Load frame size
       
  3049   __ subptr(rbx, 2*wordSize);           // We'll push pc and ebp by hand
       
  3050   __ pushptr(Address(rcx, 0));          // Save return address
       
  3051   __ enter();                           // Save old & set new ebp
       
  3052   __ subptr(rsp, rbx);                  // Prolog
       
  3053   // This value is corrected by layout_activation_impl
       
  3054   __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
       
  3055   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
       
  3056   __ mov(sender_sp, rsp);               // Pass sender_sp to next frame
       
  3057   __ addptr(rsi, wordSize);             // Bump array pointer (sizes)
       
  3058   __ addptr(rcx, wordSize);             // Bump array pointer (pcs)
       
  3059   __ decrementl(rdx);                   // Decrement counter
       
  3060   __ jcc(Assembler::notZero, loop);
       
  3061   __ pushptr(Address(rcx, 0));          // Save final return address
       
  3062 
       
  3063   // Re-push self-frame
       
  3064   __ enter();                           // Save old & set new ebp
       
  3065 
       
  3066   // Allocate a full sized register save area.
       
  3067   // Return address and rbp are in place, so we allocate two less words.
       
  3068   __ subptr(rsp, (frame_size_in_words - 2) * wordSize);
       
  3069 
       
  3070   // Restore frame locals after moving the frame
       
  3071   __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0);
       
  3072   __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
       
  3073 
       
  3074   // Call C code.  Need thread but NOT official VM entry
       
  3075   // crud.  We cannot block on this call, no GC can happen.  Call should
       
  3076   // restore return values to their stack-slots with the new SP.
       
  3077   //
       
  3078   // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
       
  3079 
       
  3080   // Use rbp because the frames look interpreted now
       
  3081   // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
       
  3082   // Don't need the precise return PC here, just precise enough to point into this code blob.
       
  3083   address the_pc = __ pc();
       
  3084   __ set_last_Java_frame(noreg, rbp, the_pc);
       
  3085 
       
  3086   __ andptr(rsp, -(StackAlignmentInBytes));  // Fix stack alignment as required by ABI
       
  3087   __ mov(c_rarg0, r15_thread);
       
  3088   __ movl(c_rarg1, r14); // second arg: exec_mode
       
  3089   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
       
  3090   // Revert SP alignment after call since we're going to do some SP relative addressing below
       
  3091   __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset()));
       
  3092 
       
  3093   // Set an oopmap for the call site
       
  3094   // Use the same PC we used for the last java frame
       
  3095   oop_maps->add_gc_map(the_pc - start,
       
  3096                        new OopMap( frame_size_in_words, 0 ));
       
  3097 
       
  3098   // Clear fp AND pc
       
  3099   __ reset_last_Java_frame(true);
       
  3100 
       
  3101   // Collect return values
       
  3102   __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
       
  3103   __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
       
  3104   // I think this is useless (throwing pc?)
       
  3105   __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
       
  3106 
       
  3107   // Pop self-frame.
       
  3108   __ leave();                           // Epilog
       
  3109 
       
  3110   // Jump to interpreter
       
  3111   __ ret(0);
       
  3112 
       
  3113   // Make sure all code is generated
       
  3114   masm->flush();
       
  3115 
       
  3116   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
       
  3117   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
       
  3118 #if INCLUDE_JVMCI
       
  3119   if (EnableJVMCI || UseAOT) {
       
  3120     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
       
  3121     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
       
  3122   }
       
  3123 #endif
       
  3124 }
       
  3125 
       
  3126 #ifdef COMPILER2
       
  3127 //------------------------------generate_uncommon_trap_blob--------------------
       
  3128 void SharedRuntime::generate_uncommon_trap_blob() {
       
  3129   // Allocate space for the code
       
  3130   ResourceMark rm;
       
  3131   // Setup code generation tools
       
  3132   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
       
  3133   MacroAssembler* masm = new MacroAssembler(&buffer);
       
  3134 
       
  3135   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
       
  3136 
       
  3137   address start = __ pc();
       
  3138 
       
  3139   if (UseRTMLocking) {
       
  3140     // Abort RTM transaction before possible nmethod deoptimization.
       
  3141     __ xabort(0);
       
  3142   }
       
  3143 
       
  3144   // Push self-frame.  We get here with a return address on the
       
  3145   // stack, so rsp is 8-byte aligned until we allocate our frame.
       
  3146   __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
       
  3147 
       
  3148   // No callee saved registers. rbp is assumed implicitly saved
       
  3149   __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
       
  3150 
       
  3151   // compiler left unloaded_class_index in j_rarg0 move to where the
       
  3152   // runtime expects it.
       
  3153   __ movl(c_rarg1, j_rarg0);
       
  3154 
       
  3155   __ set_last_Java_frame(noreg, noreg, NULL);
       
  3156 
       
  3157   // Call C code.  Need thread but NOT official VM entry
       
  3158   // crud.  We cannot block on this call, no GC can happen.  Call should
       
  3159   // capture callee-saved registers as well as return values.
       
  3160   // Thread is in rdi already.
       
  3161   //
       
  3162   // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
       
  3163 
       
  3164   __ mov(c_rarg0, r15_thread);
       
  3165   __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap);
       
  3166   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
       
  3167 
       
  3168   // Set an oopmap for the call site
       
  3169   OopMapSet* oop_maps = new OopMapSet();
       
  3170   OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
       
  3171 
       
  3172   // location of rbp is known implicitly by the frame sender code
       
  3173 
       
  3174   oop_maps->add_gc_map(__ pc() - start, map);
       
  3175 
       
  3176   __ reset_last_Java_frame(false);
       
  3177 
       
  3178   // Load UnrollBlock* into rdi
       
  3179   __ mov(rdi, rax);
       
  3180 
       
  3181 #ifdef ASSERT
       
  3182   { Label L;
       
  3183     __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()),
       
  3184             (int32_t)Deoptimization::Unpack_uncommon_trap);
       
  3185     __ jcc(Assembler::equal, L);
       
  3186     __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
       
  3187     __ bind(L);
       
  3188   }
       
  3189 #endif
       
  3190 
       
  3191   // Pop all the frames we must move/replace.
       
  3192   //
       
  3193   // Frame picture (youngest to oldest)
       
  3194   // 1: self-frame (no frame link)
       
  3195   // 2: deopting frame  (no frame link)
       
  3196   // 3: caller of deopting frame (could be compiled/interpreted).
       
  3197 
       
  3198   // Pop self-frame.  We have no frame, and must rely only on rax and rsp.
       
  3199   __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog!
       
  3200 
       
  3201   // Pop deoptimized frame (int)
       
  3202   __ movl(rcx, Address(rdi,
       
  3203                        Deoptimization::UnrollBlock::
       
  3204                        size_of_deoptimized_frame_offset_in_bytes()));
       
  3205   __ addptr(rsp, rcx);
       
  3206 
       
  3207   // rsp should be pointing at the return address to the caller (3)
       
  3208 
       
  3209   // Pick up the initial fp we should save
       
  3210   // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
       
  3211   __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
       
  3212 
       
  3213 #ifdef ASSERT
       
  3214   // Compilers generate code that bang the stack by as much as the
       
  3215   // interpreter would need. So this stack banging should never
       
  3216   // trigger a fault. Verify that it does not on non product builds.
       
  3217   if (UseStackBanging) {
       
  3218     __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
       
  3219     __ bang_stack_size(rbx, rcx);
       
  3220   }
       
  3221 #endif
       
  3222 
       
  3223   // Load address of array of frame pcs into rcx (address*)
       
  3224   __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
       
  3225 
       
  3226   // Trash the return pc
       
  3227   __ addptr(rsp, wordSize);
       
  3228 
       
  3229   // Load address of array of frame sizes into rsi (intptr_t*)
       
  3230   __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes()));
       
  3231 
       
  3232   // Counter
       
  3233   __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int)
       
  3234 
       
  3235   // Now adjust the caller's stack to make up for the extra locals but
       
  3236   // record the original sp so that we can save it in the skeletal
       
  3237   // interpreter frame and the stack walking of interpreter_sender
       
  3238   // will get the unextended sp value and not the "real" sp value.
       
  3239 
       
  3240   const Register sender_sp = r8;
       
  3241 
       
  3242   __ mov(sender_sp, rsp);
       
  3243   __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int)
       
  3244   __ subptr(rsp, rbx);
       
  3245 
       
  3246   // Push interpreter frames in a loop
       
  3247   Label loop;
       
  3248   __ bind(loop);
       
  3249   __ movptr(rbx, Address(rsi, 0)); // Load frame size
       
  3250   __ subptr(rbx, 2 * wordSize);    // We'll push pc and rbp by hand
       
  3251   __ pushptr(Address(rcx, 0));     // Save return address
       
  3252   __ enter();                      // Save old & set new rbp
       
  3253   __ subptr(rsp, rbx);             // Prolog
       
  3254   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize),
       
  3255             sender_sp);            // Make it walkable
       
  3256   // This value is corrected by layout_activation_impl
       
  3257   __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
       
  3258   __ mov(sender_sp, rsp);          // Pass sender_sp to next frame
       
  3259   __ addptr(rsi, wordSize);        // Bump array pointer (sizes)
       
  3260   __ addptr(rcx, wordSize);        // Bump array pointer (pcs)
       
  3261   __ decrementl(rdx);              // Decrement counter
       
  3262   __ jcc(Assembler::notZero, loop);
       
  3263   __ pushptr(Address(rcx, 0));     // Save final return address
       
  3264 
       
  3265   // Re-push self-frame
       
  3266   __ enter();                 // Save old & set new rbp
       
  3267   __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt);
       
  3268                               // Prolog
       
  3269 
       
  3270   // Use rbp because the frames look interpreted now
       
  3271   // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
       
  3272   // Don't need the precise return PC here, just precise enough to point into this code blob.
       
  3273   address the_pc = __ pc();
       
  3274   __ set_last_Java_frame(noreg, rbp, the_pc);
       
  3275 
       
  3276   // Call C code.  Need thread but NOT official VM entry
       
  3277   // crud.  We cannot block on this call, no GC can happen.  Call should
       
  3278   // restore return values to their stack-slots with the new SP.
       
  3279   // Thread is in rdi already.
       
  3280   //
       
  3281   // BasicType unpack_frames(JavaThread* thread, int exec_mode);
       
  3282 
       
  3283   __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI
       
  3284   __ mov(c_rarg0, r15_thread);
       
  3285   __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap);
       
  3286   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
       
  3287 
       
  3288   // Set an oopmap for the call site
       
  3289   // Use the same PC we used for the last java frame
       
  3290   oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
       
  3291 
       
  3292   // Clear fp AND pc
       
  3293   __ reset_last_Java_frame(true);
       
  3294 
       
  3295   // Pop self-frame.
       
  3296   __ leave();                 // Epilog
       
  3297 
       
  3298   // Jump to interpreter
       
  3299   __ ret(0);
       
  3300 
       
  3301   // Make sure all code is generated
       
  3302   masm->flush();
       
  3303 
       
  3304   _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
       
  3305                                                  SimpleRuntimeFrame::framesize >> 1);
       
  3306 }
       
  3307 #endif // COMPILER2
       
  3308 
       
  3309 
       
  3310 //------------------------------generate_handler_blob------
       
  3311 //
       
  3312 // Generate a special Compile2Runtime blob that saves all registers,
       
  3313 // and setup oopmap.
       
  3314 //
       
  3315 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
       
  3316   assert(StubRoutines::forward_exception_entry() != NULL,
       
  3317          "must be generated before");
       
  3318 
       
  3319   ResourceMark rm;
       
  3320   OopMapSet *oop_maps = new OopMapSet();
       
  3321   OopMap* map;
       
  3322 
       
  3323   // Allocate space for the code.  Setup code generation tools.
       
  3324   CodeBuffer buffer("handler_blob", 2048, 1024);
       
  3325   MacroAssembler* masm = new MacroAssembler(&buffer);
       
  3326 
       
  3327   address start   = __ pc();
       
  3328   address call_pc = NULL;
       
  3329   int frame_size_in_words;
       
  3330   bool cause_return = (poll_type == POLL_AT_RETURN);
       
  3331   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
       
  3332 
       
  3333   if (UseRTMLocking) {
       
  3334     // Abort RTM transaction before calling runtime
       
  3335     // because critical section will be large and will be
       
  3336     // aborted anyway. Also nmethod could be deoptimized.
       
  3337     __ xabort(0);
       
  3338   }
       
  3339 
       
  3340   // Make room for return address (or push it again)
       
  3341   if (!cause_return) {
       
  3342     __ push(rbx);
       
  3343   }
       
  3344 
       
  3345   // Save registers, fpu state, and flags
       
  3346   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
       
  3347 
       
  3348   // The following is basically a call_VM.  However, we need the precise
       
  3349   // address of the call in order to generate an oopmap. Hence, we do all the
       
  3350   // work outselves.
       
  3351 
       
  3352   __ set_last_Java_frame(noreg, noreg, NULL);
       
  3353 
       
  3354   // The return address must always be correct so that frame constructor never
       
  3355   // sees an invalid pc.
       
  3356 
       
  3357   if (!cause_return) {
       
  3358     // overwrite the dummy value we pushed on entry
       
  3359     __ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
       
  3360     __ movptr(Address(rbp, wordSize), c_rarg0);
       
  3361   }
       
  3362 
       
  3363   // Do the call
       
  3364   __ mov(c_rarg0, r15_thread);
       
  3365   __ call(RuntimeAddress(call_ptr));
       
  3366 
       
  3367   // Set an oopmap for the call site.  This oopmap will map all
       
  3368   // oop-registers and debug-info registers as callee-saved.  This
       
  3369   // will allow deoptimization at this safepoint to find all possible
       
  3370   // debug-info recordings, as well as let GC find all oops.
       
  3371 
       
  3372   oop_maps->add_gc_map( __ pc() - start, map);
       
  3373 
       
  3374   Label noException;
       
  3375 
       
  3376   __ reset_last_Java_frame(false);
       
  3377 
       
  3378   __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
       
  3379   __ jcc(Assembler::equal, noException);
       
  3380 
       
  3381   // Exception pending
       
  3382 
       
  3383   RegisterSaver::restore_live_registers(masm, save_vectors);
       
  3384 
       
  3385   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
       
  3386 
       
  3387   // No exception case
       
  3388   __ bind(noException);
       
  3389 
       
  3390   // Normal exit, restore registers and exit.
       
  3391   RegisterSaver::restore_live_registers(masm, save_vectors);
       
  3392 
       
  3393   __ ret(0);
       
  3394 
       
  3395   // Make sure all code is generated
       
  3396   masm->flush();
       
  3397 
       
  3398   // Fill-out other meta info
       
  3399   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
       
  3400 }
       
  3401 
       
  3402 //
       
  3403 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
       
  3404 //
       
  3405 // Generate a stub that calls into vm to find out the proper destination
       
  3406 // of a java call. All the argument registers are live at this point
       
  3407 // but since this is generic code we don't know what they are and the caller
       
  3408 // must do any gc of the args.
       
  3409 //
       
  3410 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
       
  3411   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
       
  3412 
       
  3413   // allocate space for the code
       
  3414   ResourceMark rm;
       
  3415 
       
  3416   CodeBuffer buffer(name, 1000, 512);
       
  3417   MacroAssembler* masm                = new MacroAssembler(&buffer);
       
  3418 
       
  3419   int frame_size_in_words;
       
  3420 
       
  3421   OopMapSet *oop_maps = new OopMapSet();
       
  3422   OopMap* map = NULL;
       
  3423 
       
  3424   int start = __ offset();
       
  3425 
       
  3426   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
       
  3427 
       
  3428   int frame_complete = __ offset();
       
  3429 
       
  3430   __ set_last_Java_frame(noreg, noreg, NULL);
       
  3431 
       
  3432   __ mov(c_rarg0, r15_thread);
       
  3433 
       
  3434   __ call(RuntimeAddress(destination));
       
  3435 
       
  3436 
       
  3437   // Set an oopmap for the call site.
       
  3438   // We need this not only for callee-saved registers, but also for volatile
       
  3439   // registers that the compiler might be keeping live across a safepoint.
       
  3440 
       
  3441   oop_maps->add_gc_map( __ offset() - start, map);
       
  3442 
       
  3443   // rax contains the address we are going to jump to assuming no exception got installed
       
  3444 
       
  3445   // clear last_Java_sp
       
  3446   __ reset_last_Java_frame(false);
       
  3447   // check for pending exceptions
       
  3448   Label pending;
       
  3449   __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
       
  3450   __ jcc(Assembler::notEqual, pending);
       
  3451 
       
  3452   // get the returned Method*
       
  3453   __ get_vm_result_2(rbx, r15_thread);
       
  3454   __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
       
  3455 
       
  3456   __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
       
  3457 
       
  3458   RegisterSaver::restore_live_registers(masm);
       
  3459 
       
  3460   // We are back the the original state on entry and ready to go.
       
  3461 
       
  3462   __ jmp(rax);
       
  3463 
       
  3464   // Pending exception after the safepoint
       
  3465 
       
  3466   __ bind(pending);
       
  3467 
       
  3468   RegisterSaver::restore_live_registers(masm);
       
  3469 
       
  3470   // exception pending => remove activation and forward to exception handler
       
  3471 
       
  3472   __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
       
  3473 
       
  3474   __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
       
  3475   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
       
  3476 
       
  3477   // -------------
       
  3478   // make sure all code is generated
       
  3479   masm->flush();
       
  3480 
       
  3481   // return the  blob
       
  3482   // frame_size_words or bytes??
       
  3483   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
       
  3484 }
       
  3485 
       
  3486 
       
  3487 //------------------------------Montgomery multiplication------------------------
       
  3488 //
       
  3489 
       
  3490 #ifndef _WINDOWS
       
  3491 
       
  3492 #define ASM_SUBTRACT
       
  3493 
       
  3494 #ifdef ASM_SUBTRACT
       
  3495 // Subtract 0:b from carry:a.  Return carry.
       
  3496 static unsigned long
       
  3497 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
       
  3498   long i = 0, cnt = len;
       
  3499   unsigned long tmp;
       
  3500   asm volatile("clc; "
       
  3501                "0: ; "
       
  3502                "mov (%[b], %[i], 8), %[tmp]; "
       
  3503                "sbb %[tmp], (%[a], %[i], 8); "
       
  3504                "inc %[i]; dec %[cnt]; "
       
  3505                "jne 0b; "
       
  3506                "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
       
  3507                : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
       
  3508                : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
       
  3509                : "memory");
       
  3510   return tmp;
       
  3511 }
       
  3512 #else // ASM_SUBTRACT
       
  3513 typedef int __attribute__((mode(TI))) int128;
       
  3514 
       
  3515 // Subtract 0:b from carry:a.  Return carry.
       
  3516 static unsigned long
       
  3517 sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
       
  3518   int128 tmp = 0;
       
  3519   int i;
       
  3520   for (i = 0; i < len; i++) {
       
  3521     tmp += a[i];
       
  3522     tmp -= b[i];
       
  3523     a[i] = tmp;
       
  3524     tmp >>= 64;
       
  3525     assert(-1 <= tmp && tmp <= 0, "invariant");
       
  3526   }
       
  3527   return tmp + carry;
       
  3528 }
       
  3529 #endif // ! ASM_SUBTRACT
       
  3530 
       
  3531 // Multiply (unsigned) Long A by Long B, accumulating the double-
       
  3532 // length result into the accumulator formed of T0, T1, and T2.
       
  3533 #define MACC(A, B, T0, T1, T2)                                  \
       
  3534 do {                                                            \
       
  3535   unsigned long hi, lo;                                         \
       
  3536   __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4"   \
       
  3537            : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
       
  3538            : "r"(A), "a"(B) : "cc");                            \
       
  3539  } while(0)
       
  3540 
       
  3541 // As above, but add twice the double-length result into the
       
  3542 // accumulator.
       
  3543 #define MACC2(A, B, T0, T1, T2)                                 \
       
  3544 do {                                                            \
       
  3545   unsigned long hi, lo;                                         \
       
  3546   __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \
       
  3547            "add %%rax, %2; adc %%rdx, %3; adc $0, %4"           \
       
  3548            : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
       
  3549            : "r"(A), "a"(B) : "cc");                            \
       
  3550  } while(0)
       
  3551 
       
  3552 // Fast Montgomery multiplication.  The derivation of the algorithm is
       
  3553 // in  A Cryptographic Library for the Motorola DSP56000,
       
  3554 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
       
  3555 
       
  3556 static void __attribute__((noinline))
       
  3557 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
       
  3558                     unsigned long m[], unsigned long inv, int len) {
       
  3559   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
       
  3560   int i;
       
  3561 
       
  3562   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
       
  3563 
       
  3564   for (i = 0; i < len; i++) {
       
  3565     int j;
       
  3566     for (j = 0; j < i; j++) {
       
  3567       MACC(a[j], b[i-j], t0, t1, t2);
       
  3568       MACC(m[j], n[i-j], t0, t1, t2);
       
  3569     }
       
  3570     MACC(a[i], b[0], t0, t1, t2);
       
  3571     m[i] = t0 * inv;
       
  3572     MACC(m[i], n[0], t0, t1, t2);
       
  3573 
       
  3574     assert(t0 == 0, "broken Montgomery multiply");
       
  3575 
       
  3576     t0 = t1; t1 = t2; t2 = 0;
       
  3577   }
       
  3578 
       
  3579   for (i = len; i < 2*len; i++) {
       
  3580     int j;
       
  3581     for (j = i-len+1; j < len; j++) {
       
  3582       MACC(a[j], b[i-j], t0, t1, t2);
       
  3583       MACC(m[j], n[i-j], t0, t1, t2);
       
  3584     }
       
  3585     m[i-len] = t0;
       
  3586     t0 = t1; t1 = t2; t2 = 0;
       
  3587   }
       
  3588 
       
  3589   while (t0)
       
  3590     t0 = sub(m, n, t0, len);
       
  3591 }
       
  3592 
       
  3593 // Fast Montgomery squaring.  This uses asymptotically 25% fewer
       
  3594 // multiplies so it should be up to 25% faster than Montgomery
       
  3595 // multiplication.  However, its loop control is more complex and it
       
  3596 // may actually run slower on some machines.
       
  3597 
       
  3598 static void __attribute__((noinline))
       
  3599 montgomery_square(unsigned long a[], unsigned long n[],
       
  3600                   unsigned long m[], unsigned long inv, int len) {
       
  3601   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
       
  3602   int i;
       
  3603 
       
  3604   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
       
  3605 
       
  3606   for (i = 0; i < len; i++) {
       
  3607     int j;
       
  3608     int end = (i+1)/2;
       
  3609     for (j = 0; j < end; j++) {
       
  3610       MACC2(a[j], a[i-j], t0, t1, t2);
       
  3611       MACC(m[j], n[i-j], t0, t1, t2);
       
  3612     }
       
  3613     if ((i & 1) == 0) {
       
  3614       MACC(a[j], a[j], t0, t1, t2);
       
  3615     }
       
  3616     for (; j < i; j++) {
       
  3617       MACC(m[j], n[i-j], t0, t1, t2);
       
  3618     }
       
  3619     m[i] = t0 * inv;
       
  3620     MACC(m[i], n[0], t0, t1, t2);
       
  3621 
       
  3622     assert(t0 == 0, "broken Montgomery square");
       
  3623 
       
  3624     t0 = t1; t1 = t2; t2 = 0;
       
  3625   }
       
  3626 
       
  3627   for (i = len; i < 2*len; i++) {
       
  3628     int start = i-len+1;
       
  3629     int end = start + (len - start)/2;
       
  3630     int j;
       
  3631     for (j = start; j < end; j++) {
       
  3632       MACC2(a[j], a[i-j], t0, t1, t2);
       
  3633       MACC(m[j], n[i-j], t0, t1, t2);
       
  3634     }
       
  3635     if ((i & 1) == 0) {
       
  3636       MACC(a[j], a[j], t0, t1, t2);
       
  3637     }
       
  3638     for (; j < len; j++) {
       
  3639       MACC(m[j], n[i-j], t0, t1, t2);
       
  3640     }
       
  3641     m[i-len] = t0;
       
  3642     t0 = t1; t1 = t2; t2 = 0;
       
  3643   }
       
  3644 
       
  3645   while (t0)
       
  3646     t0 = sub(m, n, t0, len);
       
  3647 }
       
  3648 
       
  3649 // Swap words in a longword.
       
  3650 static unsigned long swap(unsigned long x) {
       
  3651   return (x << 32) | (x >> 32);
       
  3652 }
       
  3653 
       
  3654 // Copy len longwords from s to d, word-swapping as we go.  The
       
  3655 // destination array is reversed.
       
  3656 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
       
  3657   d += len;
       
  3658   while(len-- > 0) {
       
  3659     d--;
       
  3660     *d = swap(*s);
       
  3661     s++;
       
  3662   }
       
  3663 }
       
  3664 
       
  3665 // The threshold at which squaring is advantageous was determined
       
  3666 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
       
  3667 #define MONTGOMERY_SQUARING_THRESHOLD 64
       
  3668 
       
  3669 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
       
  3670                                         jint len, jlong inv,
       
  3671                                         jint *m_ints) {
       
  3672   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
       
  3673   int longwords = len/2;
       
  3674 
       
  3675   // Make very sure we don't use so much space that the stack might
       
  3676   // overflow.  512 jints corresponds to an 16384-bit integer and
       
  3677   // will use here a total of 8k bytes of stack space.
       
  3678   int total_allocation = longwords * sizeof (unsigned long) * 4;
       
  3679   guarantee(total_allocation <= 8192, "must be");
       
  3680   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
       
  3681 
       
  3682   // Local scratch arrays
       
  3683   unsigned long
       
  3684     *a = scratch + 0 * longwords,
       
  3685     *b = scratch + 1 * longwords,
       
  3686     *n = scratch + 2 * longwords,
       
  3687     *m = scratch + 3 * longwords;
       
  3688 
       
  3689   reverse_words((unsigned long *)a_ints, a, longwords);
       
  3690   reverse_words((unsigned long *)b_ints, b, longwords);
       
  3691   reverse_words((unsigned long *)n_ints, n, longwords);
       
  3692 
       
  3693   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
       
  3694 
       
  3695   reverse_words(m, (unsigned long *)m_ints, longwords);
       
  3696 }
       
  3697 
       
  3698 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
       
  3699                                       jint len, jlong inv,
       
  3700                                       jint *m_ints) {
       
  3701   assert(len % 2 == 0, "array length in montgomery_square must be even");
       
  3702   int longwords = len/2;
       
  3703 
       
  3704   // Make very sure we don't use so much space that the stack might
       
  3705   // overflow.  512 jints corresponds to an 16384-bit integer and
       
  3706   // will use here a total of 6k bytes of stack space.
       
  3707   int total_allocation = longwords * sizeof (unsigned long) * 3;
       
  3708   guarantee(total_allocation <= 8192, "must be");
       
  3709   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
       
  3710 
       
  3711   // Local scratch arrays
       
  3712   unsigned long
       
  3713     *a = scratch + 0 * longwords,
       
  3714     *n = scratch + 1 * longwords,
       
  3715     *m = scratch + 2 * longwords;
       
  3716 
       
  3717   reverse_words((unsigned long *)a_ints, a, longwords);
       
  3718   reverse_words((unsigned long *)n_ints, n, longwords);
       
  3719 
       
  3720   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
       
  3721     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
       
  3722   } else {
       
  3723     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
       
  3724   }
       
  3725 
       
  3726   reverse_words(m, (unsigned long *)m_ints, longwords);
       
  3727 }
       
  3728 
       
  3729 #endif // WINDOWS
       
  3730 
       
  3731 #ifdef COMPILER2
       
  3732 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
       
  3733 //
       
  3734 //------------------------------generate_exception_blob---------------------------
       
  3735 // creates exception blob at the end
       
  3736 // Using exception blob, this code is jumped from a compiled method.
       
  3737 // (see emit_exception_handler in x86_64.ad file)
       
  3738 //
       
  3739 // Given an exception pc at a call we call into the runtime for the
       
  3740 // handler in this method. This handler might merely restore state
       
  3741 // (i.e. callee save registers) unwind the frame and jump to the
       
  3742 // exception handler for the nmethod if there is no Java level handler
       
  3743 // for the nmethod.
       
  3744 //
       
  3745 // This code is entered with a jmp.
       
  3746 //
       
  3747 // Arguments:
       
  3748 //   rax: exception oop
       
  3749 //   rdx: exception pc
       
  3750 //
       
  3751 // Results:
       
  3752 //   rax: exception oop
       
  3753 //   rdx: exception pc in caller or ???
       
  3754 //   destination: exception handler of caller
       
  3755 //
       
  3756 // Note: the exception pc MUST be at a call (precise debug information)
       
  3757 //       Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved.
       
  3758 //
       
  3759 
       
  3760 void OptoRuntime::generate_exception_blob() {
       
  3761   assert(!OptoRuntime::is_callee_saved_register(RDX_num), "");
       
  3762   assert(!OptoRuntime::is_callee_saved_register(RAX_num), "");
       
  3763   assert(!OptoRuntime::is_callee_saved_register(RCX_num), "");
       
  3764 
       
  3765   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
       
  3766 
       
  3767   // Allocate space for the code
       
  3768   ResourceMark rm;
       
  3769   // Setup code generation tools
       
  3770   CodeBuffer buffer("exception_blob", 2048, 1024);
       
  3771   MacroAssembler* masm = new MacroAssembler(&buffer);
       
  3772 
       
  3773 
       
  3774   address start = __ pc();
       
  3775 
       
  3776   // Exception pc is 'return address' for stack walker
       
  3777   __ push(rdx);
       
  3778   __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog
       
  3779 
       
  3780   // Save callee-saved registers.  See x86_64.ad.
       
  3781 
       
  3782   // rbp is an implicitly saved callee saved register (i.e., the calling
       
  3783   // convention will save/restore it in the prolog/epilog). Other than that
       
  3784   // there are no callee save registers now that adapter frames are gone.
       
  3785 
       
  3786   __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
       
  3787 
       
  3788   // Store exception in Thread object. We cannot pass any arguments to the
       
  3789   // handle_exception call, since we do not want to make any assumption
       
  3790   // about the size of the frame where the exception happened in.
       
  3791   // c_rarg0 is either rdi (Linux) or rcx (Windows).
       
  3792   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax);
       
  3793   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
       
  3794 
       
  3795   // This call does all the hard work.  It checks if an exception handler
       
  3796   // exists in the method.
       
  3797   // If so, it returns the handler address.
       
  3798   // If not, it prepares for stack-unwinding, restoring the callee-save
       
  3799   // registers of the frame being removed.
       
  3800   //
       
  3801   // address OptoRuntime::handle_exception_C(JavaThread* thread)
       
  3802 
       
  3803   // At a method handle call, the stack may not be properly aligned
       
  3804   // when returning with an exception.
       
  3805   address the_pc = __ pc();
       
  3806   __ set_last_Java_frame(noreg, noreg, the_pc);
       
  3807   __ mov(c_rarg0, r15_thread);
       
  3808   __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
       
  3809   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
       
  3810 
       
  3811   // Set an oopmap for the call site.  This oopmap will only be used if we
       
  3812   // are unwinding the stack.  Hence, all locations will be dead.
       
  3813   // Callee-saved registers will be the same as the frame above (i.e.,
       
  3814   // handle_exception_stub), since they were restored when we got the
       
  3815   // exception.
       
  3816 
       
  3817   OopMapSet* oop_maps = new OopMapSet();
       
  3818 
       
  3819   oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
       
  3820 
       
  3821   __ reset_last_Java_frame(false);
       
  3822 
       
  3823   // Restore callee-saved registers
       
  3824 
       
  3825   // rbp is an implicitly saved callee-saved register (i.e., the calling
       
  3826   // convention will save restore it in prolog/epilog) Other than that
       
  3827   // there are no callee save registers now that adapter frames are gone.
       
  3828 
       
  3829   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
       
  3830 
       
  3831   __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
       
  3832   __ pop(rdx);                  // No need for exception pc anymore
       
  3833 
       
  3834   // rax: exception handler
       
  3835 
       
  3836   // We have a handler in rax (could be deopt blob).
       
  3837   __ mov(r8, rax);
       
  3838 
       
  3839   // Get the exception oop
       
  3840   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
       
  3841   // Get the exception pc in case we are deoptimized
       
  3842   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
       
  3843 #ifdef ASSERT
       
  3844   __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
       
  3845   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
       
  3846 #endif
       
  3847   // Clear the exception oop so GC no longer processes it as a root.
       
  3848   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
       
  3849 
       
  3850   // rax: exception oop
       
  3851   // r8:  exception handler
       
  3852   // rdx: exception pc
       
  3853   // Jump to handler
       
  3854 
       
  3855   __ jmp(r8);
       
  3856 
       
  3857   // Make sure all code is generated
       
  3858   masm->flush();
       
  3859 
       
  3860   // Set exception blob
       
  3861   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
       
  3862 }
       
  3863 #endif // COMPILER2