hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
changeset 32797 84078d1d9013
parent 32727 320855c2baef
child 33160 c59f1676d27e
child 33105 294e48b4f704
equal deleted inserted replaced
32796:7117f1bfa7a4 32797:84078d1d9013
    67 };
    67 };
    68 
    68 
    69 class RegisterSaver {
    69 class RegisterSaver {
    70   // Capture info about frame layout.  Layout offsets are in jint
    70   // Capture info about frame layout.  Layout offsets are in jint
    71   // units because compiler frame slots are jints.
    71   // units because compiler frame slots are jints.
       
    72 #define HALF_ZMM_BANK_WORDS 128
    72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
    73 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
       
    74 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
    73   enum layout {
    75   enum layout {
    74     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
    76     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
    75     xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
    77     xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
    76     DEF_XMM_OFFS(0),
    78     DEF_XMM_OFFS(0),
    77     DEF_XMM_OFFS(1),
    79     DEF_XMM_OFFS(1),
    87     DEF_XMM_OFFS(11),
    89     DEF_XMM_OFFS(11),
    88     DEF_XMM_OFFS(12),
    90     DEF_XMM_OFFS(12),
    89     DEF_XMM_OFFS(13),
    91     DEF_XMM_OFFS(13),
    90     DEF_XMM_OFFS(14),
    92     DEF_XMM_OFFS(14),
    91     DEF_XMM_OFFS(15),
    93     DEF_XMM_OFFS(15),
    92     DEF_XMM_OFFS(16),
    94     zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
    93     DEF_XMM_OFFS(17),
    95     DEF_ZMM_OFFS(16),
    94     DEF_XMM_OFFS(18),
    96     DEF_ZMM_OFFS(17),
    95     DEF_XMM_OFFS(19),
    97     DEF_ZMM_OFFS(18),
    96     DEF_XMM_OFFS(20),
    98     DEF_ZMM_OFFS(19),
    97     DEF_XMM_OFFS(21),
    99     DEF_ZMM_OFFS(20),
    98     DEF_XMM_OFFS(22),
   100     DEF_ZMM_OFFS(21),
    99     DEF_XMM_OFFS(23),
   101     DEF_ZMM_OFFS(22),
   100     DEF_XMM_OFFS(24),
   102     DEF_ZMM_OFFS(23),
   101     DEF_XMM_OFFS(25),
   103     DEF_ZMM_OFFS(24),
   102     DEF_XMM_OFFS(26),
   104     DEF_ZMM_OFFS(25),
   103     DEF_XMM_OFFS(27),
   105     DEF_ZMM_OFFS(26),
   104     DEF_XMM_OFFS(28),
   106     DEF_ZMM_OFFS(27),
   105     DEF_XMM_OFFS(29),
   107     DEF_ZMM_OFFS(28),
   106     DEF_XMM_OFFS(30),
   108     DEF_ZMM_OFFS(29),
   107     DEF_XMM_OFFS(31),
   109     DEF_ZMM_OFFS(30),
   108     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords - 1)*wordSize / BytesPerInt),
   110     DEF_ZMM_OFFS(31),
       
   111     fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
   109     fpu_stateH_end,
   112     fpu_stateH_end,
   110     r15_off, r15H_off,
   113     r15_off, r15H_off,
   111     r14_off, r14H_off,
   114     r14_off, r14H_off,
   112     r13_off, r13H_off,
   115     r13_off, r13H_off,
   113     r12_off, r12H_off,
   116     r12_off, r12H_off,
   153   static void restore_result_registers(MacroAssembler* masm);
   156   static void restore_result_registers(MacroAssembler* masm);
   154 };
   157 };
   155 
   158 
   156 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
   159 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
   157   int vect_words = 0;
   160   int vect_words = 0;
   158   int num_xmm_regs = 16;
   161   int off = 0;
   159   if (UseAVX > 2) {
   162   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
   160     num_xmm_regs = 32;
   163   if (UseAVX < 3) {
       
   164     num_xmm_regs = num_xmm_regs/2;
   161   }
   165   }
   162 #ifdef COMPILER2
   166 #ifdef COMPILER2
   163   if (save_vectors) {
   167   if (save_vectors) {
   164     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
   168     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
   165     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
   169     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
   166     // Save upper half of YMM registers
   170     // Save upper half of YMM registers
   167     vect_words = 16 * num_xmm_regs / wordSize;
   171     vect_words = 16 * num_xmm_regs / wordSize;
   168     additional_frame_words += vect_words;
   172     if (UseAVX < 3) {
   169     if (UseAVX > 2) {
       
   170       // Save upper half of ZMM registers as well
       
   171       additional_frame_words += vect_words;
   173       additional_frame_words += vect_words;
   172     }
   174     }
   173   }
   175   }
   174 #else
   176 #else
   175   assert(!save_vectors, "vectors are generated only by C2");
   177   assert(!save_vectors, "vectors are generated only by C2");
   193   // to be under the return like a normal enter.
   195   // to be under the return like a normal enter.
   194 
   196 
   195   __ enter();          // rsp becomes 16-byte aligned here
   197   __ enter();          // rsp becomes 16-byte aligned here
   196   __ push_CPU_state(); // Push a multiple of 16 bytes
   198   __ push_CPU_state(); // Push a multiple of 16 bytes
   197 
   199 
   198   if (vect_words > 0) {
   200   // push cpu state handles this on EVEX enabled targets
       
   201   if ((vect_words > 0) && (UseAVX < 3)) {
   199     assert(vect_words*wordSize >= 256, "");
   202     assert(vect_words*wordSize >= 256, "");
   200     __ subptr(rsp, 256); // Save upper half of YMM registes(0..15)
   203     // Save upper half of YMM registes(0..num_xmm_regs)
   201     __ vextractf128h(Address(rsp, 0), xmm0);
   204     __ subptr(rsp, num_xmm_regs*16);
   202     __ vextractf128h(Address(rsp, 16), xmm1);
   205     for (int n = 0; n < num_xmm_regs; n++) {
   203     __ vextractf128h(Address(rsp, 32), xmm2);
   206       __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
   204     __ vextractf128h(Address(rsp, 48), xmm3);
       
   205     __ vextractf128h(Address(rsp, 64), xmm4);
       
   206     __ vextractf128h(Address(rsp, 80), xmm5);
       
   207     __ vextractf128h(Address(rsp, 96), xmm6);
       
   208     __ vextractf128h(Address(rsp, 112), xmm7);
       
   209     __ vextractf128h(Address(rsp, 128), xmm8);
       
   210     __ vextractf128h(Address(rsp, 144), xmm9);
       
   211     __ vextractf128h(Address(rsp, 160), xmm10);
       
   212     __ vextractf128h(Address(rsp, 176), xmm11);
       
   213     __ vextractf128h(Address(rsp, 192), xmm12);
       
   214     __ vextractf128h(Address(rsp, 208), xmm13);
       
   215     __ vextractf128h(Address(rsp, 224), xmm14);
       
   216     __ vextractf128h(Address(rsp, 240), xmm15);
       
   217     if (UseAVX > 2) {
       
   218       __ subptr(rsp, 256); // Save upper half of YMM registes(16..31)
       
   219       __ vextractf128h(Address(rsp, 0), xmm16);
       
   220       __ vextractf128h(Address(rsp, 16), xmm17);
       
   221       __ vextractf128h(Address(rsp, 32), xmm18);
       
   222       __ vextractf128h(Address(rsp, 48), xmm19);
       
   223       __ vextractf128h(Address(rsp, 64), xmm20);
       
   224       __ vextractf128h(Address(rsp, 80), xmm21);
       
   225       __ vextractf128h(Address(rsp, 96), xmm22);
       
   226       __ vextractf128h(Address(rsp, 112), xmm23);
       
   227       __ vextractf128h(Address(rsp, 128), xmm24);
       
   228       __ vextractf128h(Address(rsp, 144), xmm25);
       
   229       __ vextractf128h(Address(rsp, 160), xmm26);
       
   230       __ vextractf128h(Address(rsp, 176), xmm27);
       
   231       __ vextractf128h(Address(rsp, 192), xmm28);
       
   232       __ vextractf128h(Address(rsp, 208), xmm29);
       
   233       __ vextractf128h(Address(rsp, 224), xmm30);
       
   234       __ vextractf128h(Address(rsp, 240), xmm31);
       
   235       // Now handle the ZMM registers (0..31)
       
   236       __ subptr(rsp, 1024); // Save upper half of ZMM registes
       
   237       __ vextractf64x4h(Address(rsp, 0), xmm0);
       
   238       __ vextractf64x4h(Address(rsp, 32), xmm1);
       
   239       __ vextractf64x4h(Address(rsp, 64), xmm2);
       
   240       __ vextractf64x4h(Address(rsp, 96), xmm3);
       
   241       __ vextractf64x4h(Address(rsp, 128), xmm4);
       
   242       __ vextractf64x4h(Address(rsp, 160), xmm5);
       
   243       __ vextractf64x4h(Address(rsp, 192), xmm6);
       
   244       __ vextractf64x4h(Address(rsp, 224), xmm7);
       
   245       __ vextractf64x4h(Address(rsp, 256), xmm8);
       
   246       __ vextractf64x4h(Address(rsp, 288), xmm9);
       
   247       __ vextractf64x4h(Address(rsp, 320), xmm10);
       
   248       __ vextractf64x4h(Address(rsp, 352), xmm11);
       
   249       __ vextractf64x4h(Address(rsp, 384), xmm12);
       
   250       __ vextractf64x4h(Address(rsp, 416), xmm13);
       
   251       __ vextractf64x4h(Address(rsp, 448), xmm14);
       
   252       __ vextractf64x4h(Address(rsp, 480), xmm15);
       
   253       __ vextractf64x4h(Address(rsp, 512), xmm16);
       
   254       __ vextractf64x4h(Address(rsp, 544), xmm17);
       
   255       __ vextractf64x4h(Address(rsp, 576), xmm18);
       
   256       __ vextractf64x4h(Address(rsp, 608), xmm19);
       
   257       __ vextractf64x4h(Address(rsp, 640), xmm20);
       
   258       __ vextractf64x4h(Address(rsp, 672), xmm21);
       
   259       __ vextractf64x4h(Address(rsp, 704), xmm22);
       
   260       __ vextractf64x4h(Address(rsp, 736), xmm23);
       
   261       __ vextractf64x4h(Address(rsp, 768), xmm24);
       
   262       __ vextractf64x4h(Address(rsp, 800), xmm25);
       
   263       __ vextractf64x4h(Address(rsp, 832), xmm26);
       
   264       __ vextractf64x4h(Address(rsp, 864), xmm27);
       
   265       __ vextractf64x4h(Address(rsp, 896), xmm28);
       
   266       __ vextractf64x4h(Address(rsp, 928), xmm29);
       
   267       __ vextractf64x4h(Address(rsp, 960), xmm30);
       
   268       __ vextractf64x4h(Address(rsp, 992), xmm31);
       
   269     }
   207     }
   270   }
   208   }
   271   if (frame::arg_reg_save_area_bytes != 0) {
   209   if (frame::arg_reg_save_area_bytes != 0) {
   272     // Allocate argument register save area
   210     // Allocate argument register save area
   273     __ subptr(rsp, frame::arg_reg_save_area_bytes);
   211     __ subptr(rsp, frame::arg_reg_save_area_bytes);
   297   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
   235   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
   298   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
   236   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
   299   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
   237   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
   300   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
   238   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
   301   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
   239   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
   302   map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
   240   // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
   303   map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
   241   // on EVEX enabled targets, we get it included in the xsave area
   304   map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
   242   off = xmm0_off;
   305   map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg());
   243   int delta = xmm1_off - off;
   306   map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg());
   244   for (int n = 0; n < 16; n++) {
   307   map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg());
   245     XMMRegister xmm_name = as_XMMRegister(n);
   308   map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg());
   246     map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
   309   map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg());
   247     off += delta;
   310   map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg());
   248   }
   311   map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg());
   249   if(UseAVX > 2) {
   312   map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg());
   250     // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
   313   map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
   251     off = zmm16_off;
   314   map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
   252     delta = zmm17_off - off;
   315   map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
   253     for (int n = 16; n < num_xmm_regs; n++) {
   316   map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
   254       XMMRegister xmm_name = as_XMMRegister(n);
   317   map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
   255       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
   318   if (UseAVX > 2) {
   256       off += delta;
   319     map->set_callee_saved(STACK_OFFSET(xmm16_off), xmm16->as_VMReg());
   257     }
   320     map->set_callee_saved(STACK_OFFSET(xmm17_off), xmm17->as_VMReg());
       
   321     map->set_callee_saved(STACK_OFFSET(xmm18_off), xmm18->as_VMReg());
       
   322     map->set_callee_saved(STACK_OFFSET(xmm19_off), xmm19->as_VMReg());
       
   323     map->set_callee_saved(STACK_OFFSET(xmm20_off), xmm20->as_VMReg());
       
   324     map->set_callee_saved(STACK_OFFSET(xmm21_off), xmm21->as_VMReg());
       
   325     map->set_callee_saved(STACK_OFFSET(xmm22_off), xmm22->as_VMReg());
       
   326     map->set_callee_saved(STACK_OFFSET(xmm23_off), xmm23->as_VMReg());
       
   327     map->set_callee_saved(STACK_OFFSET(xmm24_off), xmm24->as_VMReg());
       
   328     map->set_callee_saved(STACK_OFFSET(xmm25_off), xmm25->as_VMReg());
       
   329     map->set_callee_saved(STACK_OFFSET(xmm26_off), xmm26->as_VMReg());
       
   330     map->set_callee_saved(STACK_OFFSET(xmm27_off), xmm27->as_VMReg());
       
   331     map->set_callee_saved(STACK_OFFSET(xmm28_off), xmm28->as_VMReg());
       
   332     map->set_callee_saved(STACK_OFFSET(xmm29_off), xmm29->as_VMReg());
       
   333     map->set_callee_saved(STACK_OFFSET(xmm30_off), xmm30->as_VMReg());
       
   334     map->set_callee_saved(STACK_OFFSET(xmm31_off), xmm31->as_VMReg());
       
   335   }
   258   }
   336 
   259 
   337   // %%% These should all be a waste but we'll keep things as they were for now
   260   // %%% These should all be a waste but we'll keep things as they were for now
   338   if (true) {
   261   if (true) {
   339     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
   262     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
   349     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
   272     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
   350     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
   273     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
   351     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
   274     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
   352     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
   275     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
   353     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
   276     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
   354     map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
   277     // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
   355     map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
   278     // on EVEX enabled targets, we get it included in the xsave area
   356     map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
   279     off = xmm0H_off;
   357     map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next());
   280     delta = xmm1H_off - off;
   358     map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next());
   281     for (int n = 0; n < 16; n++) {
   359     map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next());
   282       XMMRegister xmm_name = as_XMMRegister(n);
   360     map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next());
   283       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
   361     map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next());
   284       off += delta;
   362     map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next());
   285     }
   363     map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next());
       
   364     map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next());
       
   365     map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
       
   366     map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
       
   367     map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
       
   368     map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
       
   369     map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
       
   370     if (UseAVX > 2) {
   286     if (UseAVX > 2) {
   371       map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg()->next());
   287       // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
   372       map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg()->next());
   288       off = zmm16H_off;
   373       map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg()->next());
   289       delta = zmm17H_off - off;
   374       map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg()->next());
   290       for (int n = 16; n < num_xmm_regs; n++) {
   375       map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg()->next());
   291         XMMRegister xmm_name = as_XMMRegister(n);
   376       map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg()->next());
   292         map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
   377       map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg()->next());
   293         off += delta;
   378       map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next());
   294       }
   379       map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next());
       
   380       map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next());
       
   381       map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next());
       
   382       map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next());
       
   383       map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next());
       
   384       map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next());
       
   385       map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next());
       
   386       map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next());
       
   387     }
   295     }
   388   }
   296   }
   389 
   297 
   390   return map;
   298   return map;
   391 }
   299 }
   392 
   300 
   393 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   301 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
       
   302   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
       
   303   if (UseAVX < 3) {
       
   304     num_xmm_regs = num_xmm_regs/2;
       
   305   }
   394   if (frame::arg_reg_save_area_bytes != 0) {
   306   if (frame::arg_reg_save_area_bytes != 0) {
   395     // Pop arg register save area
   307     // Pop arg register save area
   396     __ addptr(rsp, frame::arg_reg_save_area_bytes);
   308     __ addptr(rsp, frame::arg_reg_save_area_bytes);
   397   }
   309   }
   398 #ifdef COMPILER2
   310 #ifdef COMPILER2
   399   if (restore_vectors) {
   311   // On EVEX enabled targets everything is handled in pop fpu state
   400     // Restore upper half of YMM registes (0..15)
   312   if ((restore_vectors) && (UseAVX < 3)) {
   401     assert(UseAVX > 0, "512bit vectors are supported only with AVX");
   313     assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
   402     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
   314     assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
   403     __ vinsertf128h(xmm0, Address(rsp,  0));
   315     int off = 0;
   404     __ vinsertf128h(xmm1, Address(rsp, 16));
   316     // Restore upper half of YMM registes (0..num_xmm_regs)
   405     __ vinsertf128h(xmm2, Address(rsp, 32));
   317     for (int n = 0; n < num_xmm_regs; n++) {
   406     __ vinsertf128h(xmm3, Address(rsp, 48));
   318       __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
   407     __ vinsertf128h(xmm4, Address(rsp, 64));
   319     }
   408     __ vinsertf128h(xmm5, Address(rsp, 80));
   320     __ addptr(rsp, num_xmm_regs*16);
   409     __ vinsertf128h(xmm6, Address(rsp, 96));
       
   410     __ vinsertf128h(xmm7, Address(rsp,112));
       
   411     __ vinsertf128h(xmm8, Address(rsp,128));
       
   412     __ vinsertf128h(xmm9, Address(rsp,144));
       
   413     __ vinsertf128h(xmm10, Address(rsp,160));
       
   414     __ vinsertf128h(xmm11, Address(rsp,176));
       
   415     __ vinsertf128h(xmm12, Address(rsp,192));
       
   416     __ vinsertf128h(xmm13, Address(rsp,208));
       
   417     __ vinsertf128h(xmm14, Address(rsp,224));
       
   418     __ vinsertf128h(xmm15, Address(rsp,240));
       
   419     __ addptr(rsp, 256);
       
   420     if (UseAVX > 2) {
       
   421       // Restore upper half of YMM registes (16..31)
       
   422       __ vinsertf128h(xmm16, Address(rsp,  0));
       
   423       __ vinsertf128h(xmm17, Address(rsp, 16));
       
   424       __ vinsertf128h(xmm18, Address(rsp, 32));
       
   425       __ vinsertf128h(xmm19, Address(rsp, 48));
       
   426       __ vinsertf128h(xmm20, Address(rsp, 64));
       
   427       __ vinsertf128h(xmm21, Address(rsp, 80));
       
   428       __ vinsertf128h(xmm22, Address(rsp, 96));
       
   429       __ vinsertf128h(xmm23, Address(rsp,112));
       
   430       __ vinsertf128h(xmm24, Address(rsp,128));
       
   431       __ vinsertf128h(xmm25, Address(rsp,144));
       
   432       __ vinsertf128h(xmm26, Address(rsp,160));
       
   433       __ vinsertf128h(xmm27, Address(rsp,176));
       
   434       __ vinsertf128h(xmm28, Address(rsp,192));
       
   435       __ vinsertf128h(xmm29, Address(rsp,208));
       
   436       __ vinsertf128h(xmm30, Address(rsp,224));
       
   437       __ vinsertf128h(xmm31, Address(rsp,240));
       
   438       __ addptr(rsp, 256);
       
   439       // Restore upper half of ZMM registes.
       
   440       __ vinsertf64x4h(xmm0, Address(rsp, 0));
       
   441       __ vinsertf64x4h(xmm1, Address(rsp, 32));
       
   442       __ vinsertf64x4h(xmm2, Address(rsp, 64));
       
   443       __ vinsertf64x4h(xmm3, Address(rsp, 96));
       
   444       __ vinsertf64x4h(xmm4, Address(rsp, 128));
       
   445       __ vinsertf64x4h(xmm5, Address(rsp, 160));
       
   446       __ vinsertf64x4h(xmm6, Address(rsp, 192));
       
   447       __ vinsertf64x4h(xmm7, Address(rsp, 224));
       
   448       __ vinsertf64x4h(xmm8, Address(rsp, 256));
       
   449       __ vinsertf64x4h(xmm9, Address(rsp, 288));
       
   450       __ vinsertf64x4h(xmm10, Address(rsp, 320));
       
   451       __ vinsertf64x4h(xmm11, Address(rsp, 352));
       
   452       __ vinsertf64x4h(xmm12, Address(rsp, 384));
       
   453       __ vinsertf64x4h(xmm13, Address(rsp, 416));
       
   454       __ vinsertf64x4h(xmm14, Address(rsp, 448));
       
   455       __ vinsertf64x4h(xmm15, Address(rsp, 480));
       
   456       __ vinsertf64x4h(xmm16, Address(rsp, 512));
       
   457       __ vinsertf64x4h(xmm17, Address(rsp, 544));
       
   458       __ vinsertf64x4h(xmm18, Address(rsp, 576));
       
   459       __ vinsertf64x4h(xmm19, Address(rsp, 608));
       
   460       __ vinsertf64x4h(xmm20, Address(rsp, 640));
       
   461       __ vinsertf64x4h(xmm21, Address(rsp, 672));
       
   462       __ vinsertf64x4h(xmm22, Address(rsp, 704));
       
   463       __ vinsertf64x4h(xmm23, Address(rsp, 736));
       
   464       __ vinsertf64x4h(xmm24, Address(rsp, 768));
       
   465       __ vinsertf64x4h(xmm25, Address(rsp, 800));
       
   466       __ vinsertf64x4h(xmm26, Address(rsp, 832));
       
   467       __ vinsertf64x4h(xmm27, Address(rsp, 864));
       
   468       __ vinsertf64x4h(xmm28, Address(rsp, 896));
       
   469       __ vinsertf64x4h(xmm29, Address(rsp, 928));
       
   470       __ vinsertf64x4h(xmm30, Address(rsp, 960));
       
   471       __ vinsertf64x4h(xmm31, Address(rsp, 992));
       
   472       __ addptr(rsp, 1024);
       
   473     }
       
   474   }
   321   }
   475 #else
   322 #else
   476   assert(!restore_vectors, "vectors are generated only by C2");
   323   assert(!restore_vectors, "vectors are generated only by C2");
   477 #endif
   324 #endif
   478   // Recover CPU state
   325   // Recover CPU state