hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
changeset 13883 6979b9850feb
parent 13881 a326d528f3e1
child 14391 df0a1573d5bd
equal deleted inserted replaced
13882:80d5d0d21b75 13883:6979b9850feb
    44 #define __ masm->
    44 #define __ masm->
    45 
    45 
    46 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
    46 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
    47 
    47 
    48 class RegisterSaver {
    48 class RegisterSaver {
    49   enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
       
    50   // Capture info about frame layout
    49   // Capture info about frame layout
       
    50 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
    51   enum layout {
    51   enum layout {
    52                 fpu_state_off = 0,
    52                 fpu_state_off = 0,
    53                 fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
    53                 fpu_state_end = fpu_state_off+FPUStateSizeInWords,
    54                 st0_off, st0H_off,
    54                 st0_off, st0H_off,
    55                 st1_off, st1H_off,
    55                 st1_off, st1H_off,
    56                 st2_off, st2H_off,
    56                 st2_off, st2H_off,
    57                 st3_off, st3H_off,
    57                 st3_off, st3H_off,
    58                 st4_off, st4H_off,
    58                 st4_off, st4H_off,
    59                 st5_off, st5H_off,
    59                 st5_off, st5H_off,
    60                 st6_off, st6H_off,
    60                 st6_off, st6H_off,
    61                 st7_off, st7H_off,
    61                 st7_off, st7H_off,
    62 
    62                 xmm_off,
    63                 xmm0_off, xmm0H_off,
    63                 DEF_XMM_OFFS(0),
    64                 xmm1_off, xmm1H_off,
    64                 DEF_XMM_OFFS(1),
    65                 xmm2_off, xmm2H_off,
    65                 DEF_XMM_OFFS(2),
    66                 xmm3_off, xmm3H_off,
    66                 DEF_XMM_OFFS(3),
    67                 xmm4_off, xmm4H_off,
    67                 DEF_XMM_OFFS(4),
    68                 xmm5_off, xmm5H_off,
    68                 DEF_XMM_OFFS(5),
    69                 xmm6_off, xmm6H_off,
    69                 DEF_XMM_OFFS(6),
    70                 xmm7_off, xmm7H_off,
    70                 DEF_XMM_OFFS(7),
    71                 flags_off,
    71                 flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word
    72                 rdi_off,
    72                 rdi_off,
    73                 rsi_off,
    73                 rsi_off,
    74                 ignore_off,  // extra copy of rbp,
    74                 ignore_off,  // extra copy of rbp,
    75                 rsp_off,
    75                 rsp_off,
    76                 rbx_off,
    76                 rbx_off,
    81                 // will override any oopMap setting for it. We must therefore force the layout
    81                 // will override any oopMap setting for it. We must therefore force the layout
    82                 // so that it agrees with the frame sender code.
    82                 // so that it agrees with the frame sender code.
    83                 rbp_off,
    83                 rbp_off,
    84                 return_off,      // slot for return address
    84                 return_off,      // slot for return address
    85                 reg_save_size };
    85                 reg_save_size };
    86 
    86   enum { FPU_regs_live = flags_off - fpu_state_end };
    87 
    87 
    88   public:
    88   public:
    89 
    89 
    90   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
    90   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
    91                                      int* total_frame_words, bool verify_fpu = true);
    91                                      int* total_frame_words, bool verify_fpu = true, bool save_vectors = false);
    92   static void restore_live_registers(MacroAssembler* masm);
    92   static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
    93 
    93 
    94   static int rax_offset() { return rax_off; }
    94   static int rax_offset() { return rax_off; }
    95   static int rbx_offset() { return rbx_off; }
    95   static int rbx_offset() { return rbx_off; }
    96 
    96 
    97   // Offsets into the register save area
    97   // Offsets into the register save area
   111   static void restore_result_registers(MacroAssembler* masm);
   111   static void restore_result_registers(MacroAssembler* masm);
   112 
   112 
   113 };
   113 };
   114 
   114 
   115 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
   115 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
   116                                            int* total_frame_words, bool verify_fpu) {
   116                                            int* total_frame_words, bool verify_fpu, bool save_vectors) {
   117 
   117   int vect_words = 0;
   118   int frame_size_in_bytes =  (reg_save_size + additional_frame_words) * wordSize;
   118 #ifdef COMPILER2
       
   119   if (save_vectors) {
       
   120     assert(UseAVX > 0, "256bit vectors are supported only with AVX");
       
   121     assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
       
   122     // Save upper half of YMM registes
       
   123     vect_words = 8 * 16 / wordSize;
       
   124     additional_frame_words += vect_words;
       
   125   }
       
   126 #else
       
   127   assert(!save_vectors, "vectors are generated only by C2");
       
   128 #endif
       
   129   int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
   119   int frame_words = frame_size_in_bytes / wordSize;
   130   int frame_words = frame_size_in_bytes / wordSize;
   120   *total_frame_words = frame_words;
   131   *total_frame_words = frame_words;
   121 
   132 
   122   assert(FPUStateSizeInWords == 27, "update stack layout");
   133   assert(FPUStateSizeInWords == 27, "update stack layout");
   123 
   134 
   127   // to be under the return like a normal enter and we want to use pusha
   138   // to be under the return like a normal enter and we want to use pusha
   128   // We push by hand instead of pusing push
   139   // We push by hand instead of pusing push
   129   __ enter();
   140   __ enter();
   130   __ pusha();
   141   __ pusha();
   131   __ pushf();
   142   __ pushf();
   132   __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
   143   __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space
   133   __ push_FPU_state();          // Save FPU state & init
   144   __ push_FPU_state();          // Save FPU state & init
   134 
   145 
   135   if (verify_fpu) {
   146   if (verify_fpu) {
   136     // Some stubs may have non standard FPU control word settings so
   147     // Some stubs may have non standard FPU control word settings so
   137     // only check and reset the value when it required to be the
   148     // only check and reset the value when it required to be the
   181     __ movflt(Address(rsp,xmm4_off*wordSize),xmm4);
   192     __ movflt(Address(rsp,xmm4_off*wordSize),xmm4);
   182     __ movflt(Address(rsp,xmm5_off*wordSize),xmm5);
   193     __ movflt(Address(rsp,xmm5_off*wordSize),xmm5);
   183     __ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
   194     __ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
   184     __ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
   195     __ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
   185   } else if( UseSSE >= 2 ) {
   196   } else if( UseSSE >= 2 ) {
   186     __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0);
   197     // Save whole 128bit (16 bytes) XMM regiters
   187     __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1);
   198     __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0);
   188     __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2);
   199     __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1);
   189     __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3);
   200     __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2);
   190     __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4);
   201     __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3);
   191     __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5);
   202     __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4);
   192     __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6);
   203     __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5);
   193     __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7);
   204     __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6);
       
   205     __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7);
       
   206   }
       
   207 
       
   208   if (vect_words > 0) {
       
   209     assert(vect_words*wordSize == 128, "");
       
   210     __ subptr(rsp, 128); // Save upper half of YMM registes
       
   211     __ vextractf128h(Address(rsp,  0),xmm0);
       
   212     __ vextractf128h(Address(rsp, 16),xmm1);
       
   213     __ vextractf128h(Address(rsp, 32),xmm2);
       
   214     __ vextractf128h(Address(rsp, 48),xmm3);
       
   215     __ vextractf128h(Address(rsp, 64),xmm4);
       
   216     __ vextractf128h(Address(rsp, 80),xmm5);
       
   217     __ vextractf128h(Address(rsp, 96),xmm6);
       
   218     __ vextractf128h(Address(rsp,112),xmm7);
   194   }
   219   }
   195 
   220 
   196   // Set an oopmap for the call site.  This oopmap will map all
   221   // Set an oopmap for the call site.  This oopmap will map all
   197   // oop-registers and debug-info registers as callee-saved.  This
   222   // oop-registers and debug-info registers as callee-saved.  This
   198   // will allow deoptimization at this safepoint to find all possible
   223   // will allow deoptimization at this safepoint to find all possible
   251 
   276 
   252   return map;
   277   return map;
   253 
   278 
   254 }
   279 }
   255 
   280 
   256 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
   281 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   257 
       
   258   // Recover XMM & FPU state
   282   // Recover XMM & FPU state
   259   if( UseSSE == 1 ) {
   283   int additional_frame_bytes = 0;
       
   284 #ifdef COMPILER2
       
   285   if (restore_vectors) {
       
   286     assert(UseAVX > 0, "256bit vectors are supported only with AVX");
       
   287     assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
       
   288     additional_frame_bytes = 128;
       
   289   }
       
   290 #else
       
   291   assert(!restore_vectors, "vectors are generated only by C2");
       
   292 #endif
       
   293   if (UseSSE == 1) {
       
   294     assert(additional_frame_bytes == 0, "");
   260     __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
   295     __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
   261     __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
   296     __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
   262     __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
   297     __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
   263     __ movflt(xmm3,Address(rsp,xmm3_off*wordSize));
   298     __ movflt(xmm3,Address(rsp,xmm3_off*wordSize));
   264     __ movflt(xmm4,Address(rsp,xmm4_off*wordSize));
   299     __ movflt(xmm4,Address(rsp,xmm4_off*wordSize));
   265     __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
   300     __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
   266     __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
   301     __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
   267     __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
   302     __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
   268   } else if( UseSSE >= 2 ) {
   303   } else if (UseSSE >= 2) {
   269     __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
   304 #define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes)
   270     __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
   305     __ movdqu(xmm0,STACK_ADDRESS(xmm0_off));
   271     __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
   306     __ movdqu(xmm1,STACK_ADDRESS(xmm1_off));
   272     __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
   307     __ movdqu(xmm2,STACK_ADDRESS(xmm2_off));
   273     __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
   308     __ movdqu(xmm3,STACK_ADDRESS(xmm3_off));
   274     __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
   309     __ movdqu(xmm4,STACK_ADDRESS(xmm4_off));
   275     __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
   310     __ movdqu(xmm5,STACK_ADDRESS(xmm5_off));
   276     __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
   311     __ movdqu(xmm6,STACK_ADDRESS(xmm6_off));
       
   312     __ movdqu(xmm7,STACK_ADDRESS(xmm7_off));
       
   313 #undef STACK_ADDRESS
       
   314   }
       
   315   if (restore_vectors) {
       
   316     // Restore upper half of YMM registes.
       
   317     assert(additional_frame_bytes == 128, "");
       
   318     __ vinsertf128h(xmm0, Address(rsp,  0));
       
   319     __ vinsertf128h(xmm1, Address(rsp, 16));
       
   320     __ vinsertf128h(xmm2, Address(rsp, 32));
       
   321     __ vinsertf128h(xmm3, Address(rsp, 48));
       
   322     __ vinsertf128h(xmm4, Address(rsp, 64));
       
   323     __ vinsertf128h(xmm5, Address(rsp, 80));
       
   324     __ vinsertf128h(xmm6, Address(rsp, 96));
       
   325     __ vinsertf128h(xmm7, Address(rsp,112));
       
   326     __ addptr(rsp, additional_frame_bytes);
   277   }
   327   }
   278   __ pop_FPU_state();
   328   __ pop_FPU_state();
   279   __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
   329   __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
   280 
   330 
   281   __ popf();
   331   __ popf();
   282   __ popa();
   332   __ popa();
   283   // Get the rbp, described implicitly by the frame sender code (no oopMap)
   333   // Get the rbp, described implicitly by the frame sender code (no oopMap)
   284   __ pop(rbp);
   334   __ pop(rbp);
   304   }
   354   }
   305   __ movptr(rax, Address(rsp, rax_off*wordSize));
   355   __ movptr(rax, Address(rsp, rax_off*wordSize));
   306   __ movptr(rdx, Address(rsp, rdx_off*wordSize));
   356   __ movptr(rdx, Address(rsp, rdx_off*wordSize));
   307   // Pop all of the register save are off the stack except the return address
   357   // Pop all of the register save are off the stack except the return address
   308   __ addptr(rsp, return_off * wordSize);
   358   __ addptr(rsp, return_off * wordSize);
       
   359 }
       
   360 
       
   361 // Is vector's size (in bytes) bigger than a size saved by default?
       
   362 // 16 bytes XMM registers are saved by default using SSE2 movdqu instructions.
       
   363 // Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated.
       
   364 bool SharedRuntime::is_wide_vector(int size) {
       
   365   return size > 16;
   309 }
   366 }
   310 
   367 
   311 // The java_calling_convention describes stack locations as ideal slots on
   368 // The java_calling_convention describes stack locations as ideal slots on
   312 // a frame with no abi restrictions. Since we must observe abi restrictions
   369 // a frame with no abi restrictions. Since we must observe abi restrictions
   313 // (like the placement of the register window) the slots must be biased by
   370 // (like the placement of the register window) the slots must be biased by
  2730 
  2787 
  2731 uint SharedRuntime::out_preserve_stack_slots() {
  2788 uint SharedRuntime::out_preserve_stack_slots() {
  2732   return 0;
  2789   return 0;
  2733 }
  2790 }
  2734 
  2791 
  2735 
       
  2736 //------------------------------generate_deopt_blob----------------------------
  2792 //------------------------------generate_deopt_blob----------------------------
  2737 void SharedRuntime::generate_deopt_blob() {
  2793 void SharedRuntime::generate_deopt_blob() {
  2738   // allocate space for the code
  2794   // allocate space for the code
  2739   ResourceMark rm;
  2795   ResourceMark rm;
  2740   // setup code generation tools
  2796   // setup code generation tools
  3268 //
  3324 //
  3269 // Generate a special Compile2Runtime blob that saves all registers,
  3325 // Generate a special Compile2Runtime blob that saves all registers,
  3270 // setup oopmap, and calls safepoint code to stop the compiled code for
  3326 // setup oopmap, and calls safepoint code to stop the compiled code for
  3271 // a safepoint.
  3327 // a safepoint.
  3272 //
  3328 //
  3273 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
  3329 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
  3274 
  3330 
  3275   // Account for thread arg in our frame
  3331   // Account for thread arg in our frame
  3276   const int additional_words = 1;
  3332   const int additional_words = 1;
  3277   int frame_size_in_words;
  3333   int frame_size_in_words;
  3278 
  3334 
  3288   MacroAssembler* masm = new MacroAssembler(&buffer);
  3344   MacroAssembler* masm = new MacroAssembler(&buffer);
  3289 
  3345 
  3290   const Register java_thread = rdi; // callee-saved for VC++
  3346   const Register java_thread = rdi; // callee-saved for VC++
  3291   address start   = __ pc();
  3347   address start   = __ pc();
  3292   address call_pc = NULL;
  3348   address call_pc = NULL;
  3293 
  3349   bool cause_return = (poll_type == POLL_AT_RETURN);
       
  3350   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
  3294   // If cause_return is true we are at a poll_return and there is
  3351   // If cause_return is true we are at a poll_return and there is
  3295   // the return address on the stack to the caller on the nmethod
  3352   // the return address on the stack to the caller on the nmethod
  3296   // that is safepoint. We can leave this return on the stack and
  3353   // that is safepoint. We can leave this return on the stack and
  3297   // effectively complete the return and safepoint in the caller.
  3354   // effectively complete the return and safepoint in the caller.
  3298   // Otherwise we push space for a return address that the safepoint
  3355   // Otherwise we push space for a return address that the safepoint
  3299   // handler will install later to make the stack walking sensible.
  3356   // handler will install later to make the stack walking sensible.
  3300   if( !cause_return )
  3357   if (!cause_return)
  3301     __ push(rbx);                // Make room for return address (or push it again)
  3358     __ push(rbx);  // Make room for return address (or push it again)
  3302 
  3359 
  3303   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
  3360   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors);
  3304 
  3361 
  3305   // The following is basically a call_VM. However, we need the precise
  3362   // The following is basically a call_VM. However, we need the precise
  3306   // address of the call in order to generate an oopmap. Hence, we do all the
  3363   // address of the call in order to generate an oopmap. Hence, we do all the
  3307   // work ourselves.
  3364   // work ourselves.
  3308 
  3365 
  3310   __ get_thread(java_thread);
  3367   __ get_thread(java_thread);
  3311   __ push(java_thread);
  3368   __ push(java_thread);
  3312   __ set_last_Java_frame(java_thread, noreg, noreg, NULL);
  3369   __ set_last_Java_frame(java_thread, noreg, noreg, NULL);
  3313 
  3370 
  3314   // if this was not a poll_return then we need to correct the return address now.
  3371   // if this was not a poll_return then we need to correct the return address now.
  3315   if( !cause_return ) {
  3372   if (!cause_return) {
  3316     __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
  3373     __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
  3317     __ movptr(Address(rbp, wordSize), rax);
  3374     __ movptr(Address(rbp, wordSize), rax);
  3318   }
  3375   }
  3319 
  3376 
  3320   // do the call
  3377   // do the call
  3338 
  3395 
  3339   __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
  3396   __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
  3340   __ jcc(Assembler::equal, noException);
  3397   __ jcc(Assembler::equal, noException);
  3341 
  3398 
  3342   // Exception pending
  3399   // Exception pending
  3343 
  3400   RegisterSaver::restore_live_registers(masm, save_vectors);
  3344   RegisterSaver::restore_live_registers(masm);
       
  3345 
  3401 
  3346   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  3402   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  3347 
  3403 
  3348   __ bind(noException);
  3404   __ bind(noException);
  3349 
  3405 
  3350   // Normal exit, register restoring and exit
  3406   // Normal exit, register restoring and exit
  3351   RegisterSaver::restore_live_registers(masm);
  3407   RegisterSaver::restore_live_registers(masm, save_vectors);
  3352 
  3408 
  3353   __ ret(0);
  3409   __ ret(0);
  3354 
  3410 
  3355   // make sure all code is generated
  3411   // make sure all code is generated
  3356   masm->flush();
  3412   masm->flush();