hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
changeset 41333 ce08d64b41c7
parent 40644 39e631ed7145
child 42039 db627462f2c9
equal deleted inserted replaced
41332:a1ff82a6136a 41333:ce08d64b41c7
  3234     }
  3234     }
  3235 
  3235 
  3236 #ifdef _WIN64
  3236 #ifdef _WIN64
  3237     // on win64, fill len_reg from stack position
  3237     // on win64, fill len_reg from stack position
  3238     __ movl(len_reg, len_mem);
  3238     __ movl(len_reg, len_mem);
  3239     // save the xmm registers which must be preserved 6-15
       
  3240     __ subptr(rsp, -rsp_after_call_off * wordSize);
       
  3241     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       
  3242       __ movdqu(xmm_save(i), as_XMMRegister(i));
       
  3243     }
       
  3244 #else
  3239 #else
  3245     __ push(len_reg); // Save
  3240     __ push(len_reg); // Save
  3246 #endif
  3241 #endif
  3247 
  3242 
  3248     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
  3243     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
  3279 
  3274 
  3280     __ BIND(L_exit);
  3275     __ BIND(L_exit);
  3281     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
  3276     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
  3282 
  3277 
  3283 #ifdef _WIN64
  3278 #ifdef _WIN64
  3284     // restore xmm regs belonging to calling function
       
  3285     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       
  3286       __ movdqu(as_XMMRegister(i), xmm_save(i));
       
  3287     }
       
  3288     __ movl(rax, len_mem);
  3279     __ movl(rax, len_mem);
  3289 #else
  3280 #else
  3290     __ pop(rax); // return length
  3281     __ pop(rax); // return length
  3291 #endif
  3282 #endif
  3292     __ leave(); // required for proper stackwalking of RuntimeStub frame
  3283     __ leave(); // required for proper stackwalking of RuntimeStub frame
  3444     }
  3435     }
  3445 
  3436 
  3446 #ifdef _WIN64
  3437 #ifdef _WIN64
  3447     // on win64, fill len_reg from stack position
  3438     // on win64, fill len_reg from stack position
  3448     __ movl(len_reg, len_mem);
  3439     __ movl(len_reg, len_mem);
  3449     // save the xmm registers which must be preserved 6-15
       
  3450     __ subptr(rsp, -rsp_after_call_off * wordSize);
       
  3451     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       
  3452       __ movdqu(xmm_save(i), as_XMMRegister(i));
       
  3453     }
       
  3454 #else
  3440 #else
  3455     __ push(len_reg); // Save
  3441     __ push(len_reg); // Save
  3456 #endif
  3442 #endif
  3457     __ push(rbx);
  3443     __ push(rbx);
  3458     // the java expanded key ordering is rotated one position from what we want
  3444     // the java expanded key ordering is rotated one position from what we want
  3642 
  3628 
  3643     __ BIND(L_exit);
  3629     __ BIND(L_exit);
  3644     __ movdqu(Address(rvec, 0), xmm_prev_block_cipher);     // final value of r stored in rvec of CipherBlockChaining object
  3630     __ movdqu(Address(rvec, 0), xmm_prev_block_cipher);     // final value of r stored in rvec of CipherBlockChaining object
  3645     __ pop(rbx);
  3631     __ pop(rbx);
  3646 #ifdef _WIN64
  3632 #ifdef _WIN64
  3647     // restore regs belonging to calling function
       
  3648     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       
  3649       __ movdqu(as_XMMRegister(i), xmm_save(i));
       
  3650     }
       
  3651     __ movl(rax, len_mem);
  3633     __ movl(rax, len_mem);
  3652 #else
  3634 #else
  3653     __ pop(rax); // return length
  3635     __ pop(rax); // return length
  3654 #endif
  3636 #endif
  3655     __ leave(); // required for proper stackwalking of RuntimeStub frame
  3637     __ leave(); // required for proper stackwalking of RuntimeStub frame
  3697     const XMMRegister msg3 = xmm6;
  3679     const XMMRegister msg3 = xmm6;
  3698     const XMMRegister shuf_mask = xmm7;
  3680     const XMMRegister shuf_mask = xmm7;
  3699 
  3681 
  3700     __ enter();
  3682     __ enter();
  3701 
  3683 
  3702 #ifdef _WIN64
       
  3703     // save the xmm registers which must be preserved 6-7
       
  3704     __ subptr(rsp, 4 * wordSize);
       
  3705     __ movdqu(Address(rsp, 0), xmm6);
       
  3706     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  3707 #endif
       
  3708 
       
  3709     __ subptr(rsp, 4 * wordSize);
  3684     __ subptr(rsp, 4 * wordSize);
  3710 
  3685 
  3711     __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
  3686     __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
  3712       buf, state, ofs, limit, rsp, multi_block);
  3687       buf, state, ofs, limit, rsp, multi_block);
  3713 
  3688 
  3714     __ addptr(rsp, 4 * wordSize);
  3689     __ addptr(rsp, 4 * wordSize);
  3715 #ifdef _WIN64
       
  3716     // restore xmm regs belonging to calling function
       
  3717     __ movdqu(xmm6, Address(rsp, 0));
       
  3718     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  3719     __ addptr(rsp, 4 * wordSize);
       
  3720 #endif
       
  3721 
  3690 
  3722     __ leave();
  3691     __ leave();
  3723     __ ret(0);
  3692     __ ret(0);
  3724     return start;
  3693     return start;
  3725   }
  3694   }
  3773     const XMMRegister msgtmp4 = xmm7;
  3742     const XMMRegister msgtmp4 = xmm7;
  3774 
  3743 
  3775     const XMMRegister shuf_mask = xmm8;
  3744     const XMMRegister shuf_mask = xmm8;
  3776 
  3745 
  3777     __ enter();
  3746     __ enter();
  3778 #ifdef _WIN64
       
  3779     // save the xmm registers which must be preserved 6-7
       
  3780     __ subptr(rsp, 6 * wordSize);
       
  3781     __ movdqu(Address(rsp, 0), xmm6);
       
  3782     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  3783     __ movdqu(Address(rsp, 4 * wordSize), xmm8);
       
  3784 
       
  3785     if (!VM_Version::supports_sha() && VM_Version::supports_avx2()) {
       
  3786       __ subptr(rsp, 10 * wordSize);
       
  3787       __ movdqu(Address(rsp, 0), xmm9);
       
  3788       __ movdqu(Address(rsp, 2 * wordSize), xmm10);
       
  3789       __ movdqu(Address(rsp, 4 * wordSize), xmm11);
       
  3790       __ movdqu(Address(rsp, 6 * wordSize), xmm12);
       
  3791       __ movdqu(Address(rsp, 8 * wordSize), xmm13);
       
  3792     }
       
  3793 #endif
       
  3794 
  3747 
  3795     __ subptr(rsp, 4 * wordSize);
  3748     __ subptr(rsp, 4 * wordSize);
  3796 
  3749 
  3797     if (VM_Version::supports_sha()) {
  3750     if (VM_Version::supports_sha()) {
  3798       __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
  3751       __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
  3800     } else if (VM_Version::supports_avx2()) {
  3753     } else if (VM_Version::supports_avx2()) {
  3801       __ sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
  3754       __ sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
  3802         buf, state, ofs, limit, rsp, multi_block, shuf_mask);
  3755         buf, state, ofs, limit, rsp, multi_block, shuf_mask);
  3803     }
  3756     }
  3804     __ addptr(rsp, 4 * wordSize);
  3757     __ addptr(rsp, 4 * wordSize);
  3805 #ifdef _WIN64
  3758 
  3806     // restore xmm regs belonging to calling function
       
  3807     if (!VM_Version::supports_sha() && VM_Version::supports_avx2()) {
       
  3808       __ movdqu(xmm9, Address(rsp, 0));
       
  3809       __ movdqu(xmm10, Address(rsp, 2 * wordSize));
       
  3810       __ movdqu(xmm11, Address(rsp, 4 * wordSize));
       
  3811       __ movdqu(xmm12, Address(rsp, 6 * wordSize));
       
  3812       __ movdqu(xmm13, Address(rsp, 8 * wordSize));
       
  3813       __ addptr(rsp, 10 * wordSize);
       
  3814     }
       
  3815     __ movdqu(xmm6, Address(rsp, 0));
       
  3816     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  3817     __ movdqu(xmm8, Address(rsp, 4 * wordSize));
       
  3818     __ addptr(rsp, 6 * wordSize);
       
  3819 #endif
       
  3820     __ leave();
  3759     __ leave();
  3821     __ ret(0);
  3760     __ ret(0);
  3822     return start;
  3761     return start;
  3823   }
  3762   }
  3824 
  3763 
  3915         __ movl(rax, 0xffff);
  3854         __ movl(rax, 0xffff);
  3916         __ kmovql(k1, rax);
  3855         __ kmovql(k1, rax);
  3917     }
  3856     }
  3918 
  3857 
  3919 #ifdef _WIN64
  3858 #ifdef _WIN64
  3920     // save the xmm registers which must be preserved 6-14
  3859     // allocate spill slots for r13, r14
  3921     const int XMM_REG_NUM_KEY_LAST = 14;
  3860     enum {
  3922     __ subptr(rsp, -rsp_after_call_off * wordSize);
  3861         saved_r13_offset,
  3923     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
  3862         saved_r14_offset
  3924       __ movdqu(xmm_save(i), as_XMMRegister(i));
  3863     };
  3925     }
  3864     __ subptr(rsp, 2 * wordSize);
  3926 
  3865     __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
  3927     const Address r13_save(rbp, rdi_off * wordSize);
  3866     __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
  3928     const Address r14_save(rbp, rsi_off * wordSize);
       
  3929 
       
  3930     __ movptr(r13_save, r13);
       
  3931     __ movptr(r14_save, r14);
       
  3932 
  3867 
  3933     // on win64, fill len_reg from stack position
  3868     // on win64, fill len_reg from stack position
  3934     __ movl(len_reg, len_mem);
  3869     __ movl(len_reg, len_mem);
  3935     __ movptr(saved_encCounter_start, saved_encCounter_mem);
  3870     __ movptr(saved_encCounter_start, saved_encCounter_mem);
  3936     __ movptr(used_addr, used_mem);
  3871     __ movptr(used_addr, used_mem);
  4128     __ BIND(L_exit);
  4063     __ BIND(L_exit);
  4129     __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
  4064     __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
  4130     __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
  4065     __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
  4131     __ pop(rbx); // pop the saved RBX.
  4066     __ pop(rbx); // pop the saved RBX.
  4132 #ifdef _WIN64
  4067 #ifdef _WIN64
  4133     // restore regs belonging to calling function
       
  4134     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       
  4135       __ movdqu(as_XMMRegister(i), xmm_save(i));
       
  4136     }
       
  4137     __ movl(rax, len_mem);
  4068     __ movl(rax, len_mem);
  4138     __ movptr(r13, r13_save);
  4069     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
  4139     __ movptr(r14, r14_save);
  4070     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
       
  4071     __ addptr(rsp, 2 * wordSize);
  4140 #else
  4072 #else
  4141     __ pop(rax); // return 'len'
  4073     __ pop(rax); // return 'len'
  4142 #endif
  4074 #endif
  4143     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4075     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4144     __ ret(0);
  4076     __ ret(0);
  4174 
  4106 
  4175     const Register state        = c_rarg0;
  4107     const Register state        = c_rarg0;
  4176     const Register subkeyH      = c_rarg1;
  4108     const Register subkeyH      = c_rarg1;
  4177     const Register data         = c_rarg2;
  4109     const Register data         = c_rarg2;
  4178     const Register blocks       = c_rarg3;
  4110     const Register blocks       = c_rarg3;
  4179 
       
  4180 #ifdef _WIN64
       
  4181     const int XMM_REG_LAST  = 10;
       
  4182 #endif
       
  4183 
  4111 
  4184     const XMMRegister xmm_temp0 = xmm0;
  4112     const XMMRegister xmm_temp0 = xmm0;
  4185     const XMMRegister xmm_temp1 = xmm1;
  4113     const XMMRegister xmm_temp1 = xmm1;
  4186     const XMMRegister xmm_temp2 = xmm2;
  4114     const XMMRegister xmm_temp2 = xmm2;
  4187     const XMMRegister xmm_temp3 = xmm3;
  4115     const XMMRegister xmm_temp3 = xmm3;
  4200     // On EVEX without VL and BW, these instructions will all be AVX.
  4128     // On EVEX without VL and BW, these instructions will all be AVX.
  4201     if (VM_Version::supports_avx512vlbw()) {
  4129     if (VM_Version::supports_avx512vlbw()) {
  4202       __ movl(rax, 0xffff);
  4130       __ movl(rax, 0xffff);
  4203       __ kmovql(k1, rax);
  4131       __ kmovql(k1, rax);
  4204     }
  4132     }
  4205 
       
  4206 #ifdef _WIN64
       
  4207     // save the xmm registers which must be preserved 6-10
       
  4208     __ subptr(rsp, -rsp_after_call_off * wordSize);
       
  4209     for (int i = 6; i <= XMM_REG_LAST; i++) {
       
  4210       __ movdqu(xmm_save(i), as_XMMRegister(i));
       
  4211     }
       
  4212 #endif
       
  4213 
  4133 
  4214     __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
  4134     __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
  4215 
  4135 
  4216     __ movdqu(xmm_temp0, Address(state, 0));
  4136     __ movdqu(xmm_temp0, Address(state, 0));
  4217     __ pshufb(xmm_temp0, xmm_temp10);
  4137     __ pshufb(xmm_temp0, xmm_temp10);
  4308 
  4228 
  4309     __ BIND(L_exit);
  4229     __ BIND(L_exit);
  4310     __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
  4230     __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
  4311     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
  4231     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
  4312 
  4232 
  4313 #ifdef _WIN64
       
  4314     // restore xmm regs belonging to calling function
       
  4315     for (int i = 6; i <= XMM_REG_LAST; i++) {
       
  4316       __ movdqu(as_XMMRegister(i), xmm_save(i));
       
  4317     }
       
  4318 #endif
       
  4319     __ leave();
  4233     __ leave();
  4320     __ ret(0);
  4234     __ ret(0);
  4321     return start;
  4235     return start;
  4322   }
  4236   }
  4323 
  4237 
  4650     const Register tmp   = r11;
  4564     const Register tmp   = r11;
  4651 
  4565 
  4652     BLOCK_COMMENT("Entry:");
  4566     BLOCK_COMMENT("Entry:");
  4653     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4567     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4654 
  4568 
  4655 #ifdef _WIN64
       
  4656     // save the xmm registers which must be preserved 6-7
       
  4657     __ subptr(rsp, 4 * wordSize);
       
  4658     __ movdqu(Address(rsp, 0), xmm6);
       
  4659     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4660 #endif
       
  4661       __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
  4569       __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
  4662 
       
  4663 #ifdef _WIN64
       
  4664     // restore xmm regs belonging to calling function
       
  4665       __ movdqu(xmm6, Address(rsp, 0));
       
  4666       __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4667       __ addptr(rsp, 4 * wordSize);
       
  4668 #endif
       
  4669 
  4570 
  4670     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4571     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4671     __ ret(0);
  4572     __ ret(0);
  4672 
  4573 
  4673     return start;
  4574     return start;
  4691     const Register tmp2 = r8;
  4592     const Register tmp2 = r8;
  4692 
  4593 
  4693     BLOCK_COMMENT("Entry:");
  4594     BLOCK_COMMENT("Entry:");
  4694     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4595     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4695 
  4596 
  4696 #ifdef _WIN64
       
  4697     // save the xmm registers which must be preserved 6-7
       
  4698     __ subptr(rsp, 4 * wordSize);
       
  4699     __ movdqu(Address(rsp, 0), xmm6);
       
  4700     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4701 #endif
       
  4702     __ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
  4597     __ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
  4703 
       
  4704 #ifdef _WIN64
       
  4705     // restore xmm regs belonging to calling function
       
  4706     __ movdqu(xmm6, Address(rsp, 0));
       
  4707     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4708     __ addptr(rsp, 4 * wordSize);
       
  4709 #endif
       
  4710 
  4598 
  4711     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4599     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4712     __ ret(0);
  4600     __ ret(0);
  4713 
  4601 
  4714     return start;
  4602     return start;
  4731     const Register tmp = r11;
  4619     const Register tmp = r11;
  4732 
  4620 
  4733     BLOCK_COMMENT("Entry:");
  4621     BLOCK_COMMENT("Entry:");
  4734     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4622     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4735 
  4623 
  4736 #ifdef _WIN64
       
  4737     // save the xmm registers which must be preserved 6-7
       
  4738     __ subptr(rsp, 4 * wordSize);
       
  4739     __ movdqu(Address(rsp, 0), xmm6);
       
  4740     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4741 #endif
       
  4742     __ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
  4624     __ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
  4743 
       
  4744 #ifdef _WIN64
       
  4745     // restore xmm regs belonging to calling function
       
  4746     __ movdqu(xmm6, Address(rsp, 0));
       
  4747     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4748     __ addptr(rsp, 4 * wordSize);
       
  4749 #endif
       
  4750 
  4625 
  4751     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4626     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4752     __ ret(0);
  4627     __ ret(0);
  4753 
  4628 
  4754     return start;
  4629     return start;
  4774     const Register tmp4 = r11;
  4649     const Register tmp4 = r11;
  4775 
  4650 
  4776     BLOCK_COMMENT("Entry:");
  4651     BLOCK_COMMENT("Entry:");
  4777     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4652     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4778 
  4653 
  4779 #ifdef _WIN64
       
  4780     // save the xmm registers which must be preserved 6-7
       
  4781     __ subptr(rsp, 4 * wordSize);
       
  4782     __ movdqu(Address(rsp, 0), xmm6);
       
  4783     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4784 #endif
       
  4785     __ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4654     __ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4786 
       
  4787 #ifdef _WIN64
       
  4788     // restore xmm regs belonging to calling function
       
  4789     __ movdqu(xmm6, Address(rsp, 0));
       
  4790     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4791     __ addptr(rsp, 4 * wordSize);
       
  4792 #endif
       
  4793 
  4655 
  4794     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4656     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4795     __ ret(0);
  4657     __ ret(0);
  4796 
  4658 
  4797     return start;
  4659     return start;
  4820     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4682     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4821 
  4683 
  4822 #ifdef _WIN64
  4684 #ifdef _WIN64
  4823     __ push(rsi);
  4685     __ push(rsi);
  4824     __ push(rdi);
  4686     __ push(rdi);
  4825     // save the xmm registers which must be preserved 6-7
       
  4826     __ subptr(rsp, 4 * wordSize);
       
  4827     __ movdqu(Address(rsp, 0), xmm6);
       
  4828     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4829 #endif
  4687 #endif
  4830     __ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4688     __ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4831 
  4689 
  4832 #ifdef _WIN64
  4690 #ifdef _WIN64
  4833     // restore xmm regs belonging to calling function
       
  4834     __ movdqu(xmm6, Address(rsp, 0));
       
  4835     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4836     __ addptr(rsp, 4 * wordSize);
       
  4837     __ pop(rdi);
  4691     __ pop(rdi);
  4838     __ pop(rsi);
  4692     __ pop(rsi);
  4839 #endif
  4693 #endif
  4840 
  4694 
  4841     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4695     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4867     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4721     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4868 
  4722 
  4869 #ifdef _WIN64
  4723 #ifdef _WIN64
  4870     __ push(rsi);
  4724     __ push(rsi);
  4871     __ push(rdi);
  4725     __ push(rdi);
  4872     // save the xmm registers which must be preserved 6-7
       
  4873     __ subptr(rsp, 4 * wordSize);
       
  4874     __ movdqu(Address(rsp, 0), xmm6);
       
  4875     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4876 #endif
  4726 #endif
  4877     __ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4727     __ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4878 
  4728 
  4879 #ifdef _WIN64
  4729 #ifdef _WIN64
  4880     // restore xmm regs belonging to calling function
       
  4881     __ movdqu(xmm6, Address(rsp, 0));
       
  4882     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4883     __ addptr(rsp, 4 * wordSize);
       
  4884     __ pop(rdi);
  4730     __ pop(rdi);
  4885     __ pop(rsi);
  4731     __ pop(rsi);
  4886 #endif
  4732 #endif
  4887 
  4733 
  4888     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4734     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4914     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4760     __ enter(); // required for proper stackwalking of RuntimeStub frame
  4915 
  4761 
  4916 #ifdef _WIN64
  4762 #ifdef _WIN64
  4917     __ push(rsi);
  4763     __ push(rsi);
  4918     __ push(rdi);
  4764     __ push(rdi);
  4919     // save the xmm registers which must be preserved 6-7
       
  4920     __ subptr(rsp, 4 * wordSize);
       
  4921     __ movdqu(Address(rsp, 0), xmm6);
       
  4922     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
       
  4923 #endif
  4765 #endif
  4924     __ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4766     __ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
  4925 
  4767 
  4926 #ifdef _WIN64
  4768 #ifdef _WIN64
  4927     // restore xmm regs belonging to calling function
       
  4928     __ movdqu(xmm6, Address(rsp, 0));
       
  4929     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
       
  4930     __ addptr(rsp, 4 * wordSize);
       
  4931     __ pop(rdi);
  4769     __ pop(rdi);
  4932     __ pop(rsi);
  4770     __ pop(rsi);
  4933 #endif
  4771 #endif
  4934 
  4772 
  4935     __ leave(); // required for proper stackwalking of RuntimeStub frame
  4773     __ leave(); // required for proper stackwalking of RuntimeStub frame