hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
changeset 34203 6817dadf6c7e
parent 34185 ee71c590a456
child 34207 a5f1c458b56e
equal deleted inserted replaced
34202:5d19ca9c25a8 34203:6817dadf6c7e
  3947 void MacroAssembler::testl(Register dst, AddressLiteral src) {
  3947 void MacroAssembler::testl(Register dst, AddressLiteral src) {
  3948   assert(reachable(src), "Address should be reachable");
  3948   assert(reachable(src), "Address should be reachable");
  3949   testl(dst, as_Address(src));
  3949   testl(dst, as_Address(src));
  3950 }
  3950 }
  3951 
  3951 
       
  3952 void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
       
  3953   int dst_enc = dst->encoding();
       
  3954   int src_enc = src->encoding();
       
  3955   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  3956     Assembler::pcmpeqb(dst, src);
       
  3957   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  3958     Assembler::pcmpeqb(dst, src);
       
  3959   } else if (src_enc < 16) {
       
  3960     subptr(rsp, 64);
       
  3961     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  3962     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  3963     Assembler::pcmpeqb(xmm0, src);
       
  3964     movdqu(dst, xmm0);
       
  3965     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  3966     addptr(rsp, 64);
       
  3967   } else if (dst_enc < 16) {
       
  3968     subptr(rsp, 64);
       
  3969     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  3970     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  3971     Assembler::pcmpeqb(dst, xmm0);
       
  3972     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  3973     addptr(rsp, 64);
       
  3974   } else {
       
  3975     subptr(rsp, 64);
       
  3976     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  3977     subptr(rsp, 64);
       
  3978     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  3979     movdqu(xmm0, src);
       
  3980     movdqu(xmm1, dst);
       
  3981     Assembler::pcmpeqb(xmm1, xmm0);
       
  3982     movdqu(dst, xmm1);
       
  3983     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  3984     addptr(rsp, 64);
       
  3985     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  3986     addptr(rsp, 64);
       
  3987   }
       
  3988 }
       
  3989 
       
  3990 void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
       
  3991   int dst_enc = dst->encoding();
       
  3992   int src_enc = src->encoding();
       
  3993   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  3994     Assembler::pcmpeqw(dst, src);
       
  3995   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  3996     Assembler::pcmpeqw(dst, src);
       
  3997   } else if (src_enc < 16) {
       
  3998     subptr(rsp, 64);
       
  3999     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4000     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4001     Assembler::pcmpeqw(xmm0, src);
       
  4002     movdqu(dst, xmm0);
       
  4003     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4004     addptr(rsp, 64);
       
  4005   } else if (dst_enc < 16) {
       
  4006     subptr(rsp, 64);
       
  4007     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4008     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4009     Assembler::pcmpeqw(dst, xmm0);
       
  4010     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4011     addptr(rsp, 64);
       
  4012   } else {
       
  4013     subptr(rsp, 64);
       
  4014     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4015     subptr(rsp, 64);
       
  4016     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4017     movdqu(xmm0, src);
       
  4018     movdqu(xmm1, dst);
       
  4019     Assembler::pcmpeqw(xmm1, xmm0);
       
  4020     movdqu(dst, xmm1);
       
  4021     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4022     addptr(rsp, 64);
       
  4023     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4024     addptr(rsp, 64);
       
  4025   }
       
  4026 }
       
  4027 
       
  4028 void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
       
  4029   int dst_enc = dst->encoding();
       
  4030   if (dst_enc < 16) {
       
  4031     Assembler::pcmpestri(dst, src, imm8);
       
  4032   } else {
       
  4033     subptr(rsp, 64);
       
  4034     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4035     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4036     Assembler::pcmpestri(xmm0, src, imm8);
       
  4037     movdqu(dst, xmm0);
       
  4038     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4039     addptr(rsp, 64);
       
  4040   }
       
  4041 }
       
  4042 
       
  4043 void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
       
  4044   int dst_enc = dst->encoding();
       
  4045   int src_enc = src->encoding();
       
  4046   if ((dst_enc < 16) && (src_enc < 16)) {
       
  4047     Assembler::pcmpestri(dst, src, imm8);
       
  4048   } else if (src_enc < 16) {
       
  4049     subptr(rsp, 64);
       
  4050     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4051     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4052     Assembler::pcmpestri(xmm0, src, imm8);
       
  4053     movdqu(dst, xmm0);
       
  4054     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4055     addptr(rsp, 64);
       
  4056   } else if (dst_enc < 16) {
       
  4057     subptr(rsp, 64);
       
  4058     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4059     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4060     Assembler::pcmpestri(dst, xmm0, imm8);
       
  4061     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4062     addptr(rsp, 64);
       
  4063   } else {
       
  4064     subptr(rsp, 64);
       
  4065     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4066     subptr(rsp, 64);
       
  4067     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4068     movdqu(xmm0, src);
       
  4069     movdqu(xmm1, dst);
       
  4070     Assembler::pcmpestri(xmm1, xmm0, imm8);
       
  4071     movdqu(dst, xmm1);
       
  4072     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4073     addptr(rsp, 64);
       
  4074     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4075     addptr(rsp, 64);
       
  4076   }
       
  4077 }
       
  4078 
       
  4079 void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
       
  4080   int dst_enc = dst->encoding();
       
  4081   int src_enc = src->encoding();
       
  4082   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4083     Assembler::pmovzxbw(dst, src);
       
  4084   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  4085     Assembler::pmovzxbw(dst, src);
       
  4086   } else if (src_enc < 16) {
       
  4087     subptr(rsp, 64);
       
  4088     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4089     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4090     Assembler::pmovzxbw(xmm0, src);
       
  4091     movdqu(dst, xmm0);
       
  4092     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4093     addptr(rsp, 64);
       
  4094   } else if (dst_enc < 16) {
       
  4095     subptr(rsp, 64);
       
  4096     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4097     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4098     Assembler::pmovzxbw(dst, xmm0);
       
  4099     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4100     addptr(rsp, 64);
       
  4101   } else {
       
  4102     subptr(rsp, 64);
       
  4103     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4104     subptr(rsp, 64);
       
  4105     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4106     movdqu(xmm0, src);
       
  4107     movdqu(xmm1, dst);
       
  4108     Assembler::pmovzxbw(xmm1, xmm0);
       
  4109     movdqu(dst, xmm1);
       
  4110     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4111     addptr(rsp, 64);
       
  4112     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4113     addptr(rsp, 64);
       
  4114   }
       
  4115 }
       
  4116 
       
  4117 void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
       
  4118   int dst_enc = dst->encoding();
       
  4119   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4120     Assembler::pmovzxbw(dst, src);
       
  4121   } else if (dst_enc < 16) {
       
  4122     Assembler::pmovzxbw(dst, src);
       
  4123   } else {
       
  4124     subptr(rsp, 64);
       
  4125     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4126     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4127     Assembler::pmovzxbw(xmm0, src);
       
  4128     movdqu(dst, xmm0);
       
  4129     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4130     addptr(rsp, 64);
       
  4131   }
       
  4132 }
       
  4133 
       
  4134 void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
       
  4135   int src_enc = src->encoding();
       
  4136   if (src_enc < 16) {
       
  4137     Assembler::pmovmskb(dst, src);
       
  4138   } else {
       
  4139     subptr(rsp, 64);
       
  4140     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4141     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4142     Assembler::pmovmskb(dst, xmm0);
       
  4143     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4144     addptr(rsp, 64);
       
  4145   }
       
  4146 }
       
  4147 
       
  4148 void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
       
  4149   int dst_enc = dst->encoding();
       
  4150   int src_enc = src->encoding();
       
  4151   if ((dst_enc < 16) && (src_enc < 16)) {
       
  4152     Assembler::ptest(dst, src);
       
  4153   } else if (src_enc < 16) {
       
  4154     subptr(rsp, 64);
       
  4155     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4156     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4157     Assembler::ptest(xmm0, src);
       
  4158     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4159     addptr(rsp, 64);
       
  4160   } else if (dst_enc < 16) {
       
  4161     subptr(rsp, 64);
       
  4162     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4163     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4164     Assembler::ptest(dst, xmm0);
       
  4165     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4166     addptr(rsp, 64);
       
  4167   } else {
       
  4168     subptr(rsp, 64);
       
  4169     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4170     subptr(rsp, 64);
       
  4171     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4172     movdqu(xmm0, src);
       
  4173     movdqu(xmm1, dst);
       
  4174     Assembler::ptest(xmm1, xmm0);
       
  4175     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4176     addptr(rsp, 64);
       
  4177     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4178     addptr(rsp, 64);
       
  4179   }
       
  4180 }
       
  4181 
  3952 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
  4182 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
  3953   if (reachable(src)) {
  4183   if (reachable(src)) {
  3954     Assembler::sqrtsd(dst, as_Address(src));
  4184     Assembler::sqrtsd(dst, as_Address(src));
  3955   } else {
  4185   } else {
  3956     lea(rscratch1, src);
  4186     lea(rscratch1, src);
  4254     Assembler::vpaddw(xmm0, xmm0, src, vector_len);
  4484     Assembler::vpaddw(xmm0, xmm0, src, vector_len);
  4255     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
  4485     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
  4256   }
  4486   }
  4257 }
  4487 }
  4258 
  4488 
       
  4489 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
       
  4490   int dst_enc = dst->encoding();
       
  4491   int src_enc = src->encoding();
       
  4492   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4493     Assembler::vpbroadcastw(dst, src);
       
  4494   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  4495     Assembler::vpbroadcastw(dst, src);
       
  4496   } else if (src_enc < 16) {
       
  4497     subptr(rsp, 64);
       
  4498     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4499     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4500     Assembler::vpbroadcastw(xmm0, src);
       
  4501     movdqu(dst, xmm0);
       
  4502     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4503     addptr(rsp, 64);
       
  4504   } else if (dst_enc < 16) {
       
  4505     subptr(rsp, 64);
       
  4506     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4507     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4508     Assembler::vpbroadcastw(dst, xmm0);
       
  4509     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4510     addptr(rsp, 64);
       
  4511   } else {
       
  4512     subptr(rsp, 64);
       
  4513     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4514     subptr(rsp, 64);
       
  4515     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4516     movdqu(xmm0, src);
       
  4517     movdqu(xmm1, dst);
       
  4518     Assembler::vpbroadcastw(xmm1, xmm0);
       
  4519     movdqu(dst, xmm1);
       
  4520     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4521     addptr(rsp, 64);
       
  4522     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4523     addptr(rsp, 64);
       
  4524   }
       
  4525 }
       
  4526 
       
  4527 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
       
  4528   int dst_enc = dst->encoding();
       
  4529   int nds_enc = nds->encoding();
       
  4530   int src_enc = src->encoding();
       
  4531   assert(dst_enc == nds_enc, "");
       
  4532   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4533     Assembler::vpcmpeqb(dst, nds, src, vector_len);
       
  4534   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  4535     Assembler::vpcmpeqb(dst, nds, src, vector_len);
       
  4536   } else if (src_enc < 16) {
       
  4537     subptr(rsp, 64);
       
  4538     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4539     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4540     Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
       
  4541     movdqu(dst, xmm0);
       
  4542     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4543     addptr(rsp, 64);
       
  4544   } else if (dst_enc < 16) {
       
  4545     subptr(rsp, 64);
       
  4546     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4547     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4548     Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
       
  4549     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4550     addptr(rsp, 64);
       
  4551   } else {
       
  4552     subptr(rsp, 64);
       
  4553     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4554     subptr(rsp, 64);
       
  4555     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4556     movdqu(xmm0, src);
       
  4557     movdqu(xmm1, dst);
       
  4558     Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
       
  4559     movdqu(dst, xmm1);
       
  4560     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4561     addptr(rsp, 64);
       
  4562     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4563     addptr(rsp, 64);
       
  4564   }
       
  4565 }
       
  4566 
       
  4567 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
       
  4568   int dst_enc = dst->encoding();
       
  4569   int nds_enc = nds->encoding();
       
  4570   int src_enc = src->encoding();
       
  4571   assert(dst_enc == nds_enc, "");
       
  4572   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4573     Assembler::vpcmpeqw(dst, nds, src, vector_len);
       
  4574   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  4575     Assembler::vpcmpeqw(dst, nds, src, vector_len);
       
  4576   } else if (src_enc < 16) {
       
  4577     subptr(rsp, 64);
       
  4578     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4579     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4580     Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
       
  4581     movdqu(dst, xmm0);
       
  4582     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4583     addptr(rsp, 64);
       
  4584   } else if (dst_enc < 16) {
       
  4585     subptr(rsp, 64);
       
  4586     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4587     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4588     Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
       
  4589     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4590     addptr(rsp, 64);
       
  4591   } else {
       
  4592     subptr(rsp, 64);
       
  4593     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4594     subptr(rsp, 64);
       
  4595     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4596     movdqu(xmm0, src);
       
  4597     movdqu(xmm1, dst);
       
  4598     Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
       
  4599     movdqu(dst, xmm1);
       
  4600     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4601     addptr(rsp, 64);
       
  4602     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4603     addptr(rsp, 64);
       
  4604   }
       
  4605 }
       
  4606 
       
  4607 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
       
  4608   int dst_enc = dst->encoding();
       
  4609   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4610     Assembler::vpmovzxbw(dst, src, vector_len);
       
  4611   } else if (dst_enc < 16) {
       
  4612     Assembler::vpmovzxbw(dst, src, vector_len);
       
  4613   } else {
       
  4614     subptr(rsp, 64);
       
  4615     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4616     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4617     Assembler::vpmovzxbw(xmm0, src, vector_len);
       
  4618     movdqu(dst, xmm0);
       
  4619     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4620     addptr(rsp, 64);
       
  4621   }
       
  4622 }
       
  4623 
       
  4624 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
       
  4625   int src_enc = src->encoding();
       
  4626   if (src_enc < 16) {
       
  4627     Assembler::vpmovmskb(dst, src);
       
  4628   } else {
       
  4629     subptr(rsp, 64);
       
  4630     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  4631     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4632     Assembler::vpmovmskb(dst, xmm0);
       
  4633     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  4634     addptr(rsp, 64);
       
  4635   }
       
  4636 }
       
  4637 
       
  4638 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
       
  4639   int dst_enc = dst->encoding();
       
  4640   int nds_enc = nds->encoding();
       
  4641   int src_enc = src->encoding();
       
  4642   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4643     Assembler::vpmullw(dst, nds, src, vector_len);
       
  4644   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  4645     Assembler::vpmullw(dst, dst, src, vector_len);
       
  4646   } else if ((dst_enc < 16) && (nds_enc < 16)) {
       
  4647     // use nds as scratch for src
       
  4648     evmovdqul(nds, src, Assembler::AVX_512bit);
       
  4649     Assembler::vpmullw(dst, dst, nds, vector_len);
       
  4650   } else if ((src_enc < 16) && (nds_enc < 16)) {
       
  4651     // use nds as scratch for dst
       
  4652     evmovdqul(nds, dst, Assembler::AVX_512bit);
       
  4653     Assembler::vpmullw(nds, nds, src, vector_len);
       
  4654     evmovdqul(dst, nds, Assembler::AVX_512bit);
       
  4655   } else if (dst_enc < 16) {
       
  4656     // use nds as scatch for xmm0 to hold src
       
  4657     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
       
  4658     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4659     Assembler::vpmullw(dst, dst, xmm0, vector_len);
       
  4660     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  4661   } else {
       
  4662     // worse case scenario, all regs are in the upper bank
       
  4663     subptr(rsp, 64);
       
  4664     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4665     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
       
  4666     evmovdqul(xmm1, src, Assembler::AVX_512bit);
       
  4667     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4668     Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
       
  4669     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
       
  4670     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  4671     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4672     addptr(rsp, 64);
       
  4673   }
       
  4674 }
       
  4675 
       
  4676 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
       
  4677   int dst_enc = dst->encoding();
       
  4678   int nds_enc = nds->encoding();
       
  4679   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4680     Assembler::vpmullw(dst, nds, src, vector_len);
       
  4681   } else if (dst_enc < 16) {
       
  4682     Assembler::vpmullw(dst, dst, src, vector_len);
       
  4683   } else if (nds_enc < 16) {
       
  4684     // implies dst_enc in upper bank with src as scratch
       
  4685     evmovdqul(nds, dst, Assembler::AVX_512bit);
       
  4686     Assembler::vpmullw(nds, nds, src, vector_len);
       
  4687     evmovdqul(dst, nds, Assembler::AVX_512bit);
       
  4688   } else {
       
  4689     // worse case scenario, all regs in upper bank
       
  4690     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
       
  4691     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4692     Assembler::vpmullw(xmm0, xmm0, src, vector_len);
       
  4693     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  4694   }
       
  4695 }
       
  4696 
  4259 void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
  4697 void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
  4260   int dst_enc = dst->encoding();
  4698   int dst_enc = dst->encoding();
  4261   int nds_enc = nds->encoding();
  4699   int nds_enc = nds->encoding();
  4262   int src_enc = src->encoding();
  4700   int src_enc = src->encoding();
  4263   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
  4701   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
  4368   } else {
  4806   } else {
  4369     // worse case scenario, all regs in upper bank
  4807     // worse case scenario, all regs in upper bank
  4370     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
  4808     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
  4371     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
  4809     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
  4372     Assembler::vpsubw(xmm0, xmm0, src, vector_len);
  4810     Assembler::vpsubw(xmm0, xmm0, src, vector_len);
  4373     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  4374   }
       
  4375 }
       
  4376 
       
  4377 
       
  4378 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
       
  4379   int dst_enc = dst->encoding();
       
  4380   int nds_enc = nds->encoding();
       
  4381   int src_enc = src->encoding();
       
  4382   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4383     Assembler::vpmullw(dst, nds, src, vector_len);
       
  4384   } else if ((dst_enc < 16) && (src_enc < 16)) {
       
  4385     Assembler::vpmullw(dst, dst, src, vector_len);
       
  4386   } else if ((dst_enc < 16) && (nds_enc < 16)) {
       
  4387     // use nds as scratch for src
       
  4388     evmovdqul(nds, src, Assembler::AVX_512bit);
       
  4389     Assembler::vpmullw(dst, dst, nds, vector_len);
       
  4390   } else if ((src_enc < 16) && (nds_enc < 16)) {
       
  4391     // use nds as scratch for dst
       
  4392     evmovdqul(nds, dst, Assembler::AVX_512bit);
       
  4393     Assembler::vpmullw(nds, nds, src, vector_len);
       
  4394     evmovdqul(dst, nds, Assembler::AVX_512bit);
       
  4395   } else if (dst_enc < 16) {
       
  4396     // use nds as scatch for xmm0 to hold src
       
  4397     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
       
  4398     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  4399     Assembler::vpmullw(dst, dst, xmm0, vector_len);
       
  4400     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  4401   } else {
       
  4402     // worse case scenario, all regs are in the upper bank
       
  4403     subptr(rsp, 64);
       
  4404     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  4405     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
       
  4406     evmovdqul(xmm1, src, Assembler::AVX_512bit);
       
  4407     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4408     Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
       
  4409     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
       
  4410     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  4411     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  4412     addptr(rsp, 64);
       
  4413   }
       
  4414 }
       
  4415 
       
  4416 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
       
  4417   int dst_enc = dst->encoding();
       
  4418   int nds_enc = nds->encoding();
       
  4419   if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
       
  4420     Assembler::vpmullw(dst, nds, src, vector_len);
       
  4421   } else if (dst_enc < 16) {
       
  4422     Assembler::vpmullw(dst, dst, src, vector_len);
       
  4423   } else if (nds_enc < 16) {
       
  4424     // implies dst_enc in upper bank with src as scratch
       
  4425     evmovdqul(nds, dst, Assembler::AVX_512bit);
       
  4426     Assembler::vpmullw(nds, nds, src, vector_len);
       
  4427     evmovdqul(dst, nds, Assembler::AVX_512bit);
       
  4428   } else {
       
  4429     // worse case scenario, all regs in upper bank
       
  4430     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
       
  4431     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  4432     Assembler::vpmullw(xmm0, xmm0, src, vector_len);
       
  4433     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
  4811     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
  4434   }
  4812   }
  4435 }
  4813 }
  4436 
  4814 
  4437 void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
  4815 void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
  4633     // use nds as scratch for xmm0
  5011     // use nds as scratch for xmm0
  4634     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
  5012     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
  4635     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
  5013     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
  4636     Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
  5014     Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
  4637     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
  5015     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       
  5016   }
       
  5017 }
       
  5018 
       
  5019 void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
       
  5020   int dst_enc = dst->encoding();
       
  5021   int src_enc = src->encoding();
       
  5022   if ((dst_enc < 16) && (src_enc < 16)) {
       
  5023     Assembler::vptest(dst, src);
       
  5024   } else if (src_enc < 16) {
       
  5025     subptr(rsp, 64);
       
  5026     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  5027     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
       
  5028     Assembler::vptest(xmm0, src);
       
  5029     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  5030     addptr(rsp, 64);
       
  5031   } else if (dst_enc < 16) {
       
  5032     subptr(rsp, 64);
       
  5033     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  5034     evmovdqul(xmm0, src, Assembler::AVX_512bit);
       
  5035     Assembler::vptest(dst, xmm0);
       
  5036     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  5037     addptr(rsp, 64);
       
  5038   } else {
       
  5039     subptr(rsp, 64);
       
  5040     evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
       
  5041     subptr(rsp, 64);
       
  5042     evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
       
  5043     movdqu(xmm0, src);
       
  5044     movdqu(xmm1, dst);
       
  5045     Assembler::vptest(xmm1, xmm0);
       
  5046     evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
       
  5047     addptr(rsp, 64);
       
  5048     evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
       
  5049     addptr(rsp, 64);
  4638   }
  5050   }
  4639 }
  5051 }
  4640 
  5052 
  4641 // This instruction exists within macros, ergo we cannot control its input
  5053 // This instruction exists within macros, ergo we cannot control its input
  4642 // when emitted through those patterns.
  5054 // when emitted through those patterns.
  7720     bind(COMPARE_WIDE_VECTORS_LOOP);
  8132     bind(COMPARE_WIDE_VECTORS_LOOP);
  7721     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  8133     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
  7722       vmovdqu(vec1, Address(str1, result, scale));
  8134       vmovdqu(vec1, Address(str1, result, scale));
  7723       vpxor(vec1, Address(str2, result, scale));
  8135       vpxor(vec1, Address(str2, result, scale));
  7724     } else {
  8136     } else {
  7725       vpmovzxbw(vec1, Address(str1, result, scale1));
  8137       vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
  7726       vpxor(vec1, Address(str2, result, scale2));
  8138       vpxor(vec1, Address(str2, result, scale2));
  7727     }
  8139     }
  7728     vptest(vec1, vec1);
  8140     vptest(vec1, vec1);
  7729     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
  8141     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
  7730     addptr(result, stride2);
  8142     addptr(result, stride2);