4045 if (reachable(src)) { |
4069 if (reachable(src)) { |
4046 vaddss(dst, nds, as_Address(src)); |
4070 vaddss(dst, nds, as_Address(src)); |
4047 } else { |
4071 } else { |
4048 lea(rscratch1, src); |
4072 lea(rscratch1, src); |
4049 vaddss(dst, nds, Address(rscratch1, 0)); |
4073 vaddss(dst, nds, Address(rscratch1, 0)); |
|
4074 } |
|
4075 } |
|
4076 |
|
4077 void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { |
|
4078 int dst_enc = dst->encoding(); |
|
4079 int nds_enc = nds->encoding(); |
|
4080 int src_enc = src->encoding(); |
|
4081 if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4082 vandps(dst, nds, negate_field, vector_len); |
|
4083 } else if ((src_enc < 16) && (dst_enc < 16)) { |
|
4084 movss(src, nds); |
|
4085 vandps(dst, src, negate_field, vector_len); |
|
4086 } else if (src_enc < 16) { |
|
4087 movss(src, nds); |
|
4088 vandps(src, src, negate_field, vector_len); |
|
4089 movss(dst, src); |
|
4090 } else if (dst_enc < 16) { |
|
4091 movdqu(src, xmm0); |
|
4092 movss(xmm0, nds); |
|
4093 vandps(dst, xmm0, negate_field, vector_len); |
|
4094 movdqu(xmm0, src); |
|
4095 } else if (nds_enc < 16) { |
|
4096 movdqu(src, xmm0); |
|
4097 vandps(xmm0, nds, negate_field, vector_len); |
|
4098 movss(dst, xmm0); |
|
4099 movdqu(xmm0, src); |
|
4100 } else { |
|
4101 movdqu(src, xmm0); |
|
4102 movss(xmm0, nds); |
|
4103 vandps(xmm0, xmm0, negate_field, vector_len); |
|
4104 movss(dst, xmm0); |
|
4105 movdqu(xmm0, src); |
|
4106 } |
|
4107 } |
|
4108 |
|
4109 void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { |
|
4110 int dst_enc = dst->encoding(); |
|
4111 int nds_enc = nds->encoding(); |
|
4112 int src_enc = src->encoding(); |
|
4113 if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4114 vandpd(dst, nds, negate_field, vector_len); |
|
4115 } else if ((src_enc < 16) && (dst_enc < 16)) { |
|
4116 movsd(src, nds); |
|
4117 vandpd(dst, src, negate_field, vector_len); |
|
4118 } else if (src_enc < 16) { |
|
4119 movsd(src, nds); |
|
4120 vandpd(src, src, negate_field, vector_len); |
|
4121 movsd(dst, src); |
|
4122 } else if (dst_enc < 16) { |
|
4123 movdqu(src, xmm0); |
|
4124 movsd(xmm0, nds); |
|
4125 vandpd(dst, xmm0, negate_field, vector_len); |
|
4126 movdqu(xmm0, src); |
|
4127 } else if (nds_enc < 16) { |
|
4128 movdqu(src, xmm0); |
|
4129 vandpd(xmm0, nds, negate_field, vector_len); |
|
4130 movsd(dst, xmm0); |
|
4131 movdqu(xmm0, src); |
|
4132 } else { |
|
4133 movdqu(src, xmm0); |
|
4134 movsd(xmm0, nds); |
|
4135 vandpd(xmm0, xmm0, negate_field, vector_len); |
|
4136 movsd(dst, xmm0); |
|
4137 movdqu(xmm0, src); |
|
4138 } |
|
4139 } |
|
4140 |
|
4141 void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
|
4142 int dst_enc = dst->encoding(); |
|
4143 int nds_enc = nds->encoding(); |
|
4144 int src_enc = src->encoding(); |
|
4145 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4146 Assembler::vpaddb(dst, nds, src, vector_len); |
|
4147 } else if ((dst_enc < 16) && (src_enc < 16)) { |
|
4148 Assembler::vpaddb(dst, dst, src, vector_len); |
|
4149 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4150 // use nds as scratch for src |
|
4151 evmovdqul(nds, src, Assembler::AVX_512bit); |
|
4152 Assembler::vpaddb(dst, dst, nds, vector_len); |
|
4153 } else if ((src_enc < 16) && (nds_enc < 16)) { |
|
4154 // use nds as scratch for dst |
|
4155 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4156 Assembler::vpaddb(nds, nds, src, vector_len); |
|
4157 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4158 } else if (dst_enc < 16) { |
|
4159 // use nds as scatch for xmm0 to hold src |
|
4160 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4161 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4162 Assembler::vpaddb(dst, dst, xmm0, vector_len); |
|
4163 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4164 } else { |
|
4165 // worse case scenario, all regs are in the upper bank |
|
4166 subptr(rsp, 64); |
|
4167 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4168 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4169 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4170 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4171 Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len); |
|
4172 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4173 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4174 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4175 addptr(rsp, 64); |
|
4176 } |
|
4177 } |
|
4178 |
|
4179 void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
|
4180 int dst_enc = dst->encoding(); |
|
4181 int nds_enc = nds->encoding(); |
|
4182 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4183 Assembler::vpaddb(dst, nds, src, vector_len); |
|
4184 } else if (dst_enc < 16) { |
|
4185 Assembler::vpaddb(dst, dst, src, vector_len); |
|
4186 } else if (nds_enc < 16) { |
|
4187 // implies dst_enc in upper bank with src as scratch |
|
4188 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4189 Assembler::vpaddb(nds, nds, src, vector_len); |
|
4190 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4191 } else { |
|
4192 // worse case scenario, all regs in upper bank |
|
4193 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4194 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4195 Assembler::vpaddb(xmm0, xmm0, src, vector_len); |
|
4196 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4197 } |
|
4198 } |
|
4199 |
|
4200 void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
|
4201 int dst_enc = dst->encoding(); |
|
4202 int nds_enc = nds->encoding(); |
|
4203 int src_enc = src->encoding(); |
|
4204 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4205 Assembler::vpaddw(dst, nds, src, vector_len); |
|
4206 } else if ((dst_enc < 16) && (src_enc < 16)) { |
|
4207 Assembler::vpaddw(dst, dst, src, vector_len); |
|
4208 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4209 // use nds as scratch for src |
|
4210 evmovdqul(nds, src, Assembler::AVX_512bit); |
|
4211 Assembler::vpaddw(dst, dst, nds, vector_len); |
|
4212 } else if ((src_enc < 16) && (nds_enc < 16)) { |
|
4213 // use nds as scratch for dst |
|
4214 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4215 Assembler::vpaddw(nds, nds, src, vector_len); |
|
4216 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4217 } else if (dst_enc < 16) { |
|
4218 // use nds as scatch for xmm0 to hold src |
|
4219 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4220 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4221 Assembler::vpaddw(dst, dst, xmm0, vector_len); |
|
4222 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4223 } else { |
|
4224 // worse case scenario, all regs are in the upper bank |
|
4225 subptr(rsp, 64); |
|
4226 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4227 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4228 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4229 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4230 Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len); |
|
4231 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4232 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4233 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4234 addptr(rsp, 64); |
|
4235 } |
|
4236 } |
|
4237 |
|
4238 void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
|
4239 int dst_enc = dst->encoding(); |
|
4240 int nds_enc = nds->encoding(); |
|
4241 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4242 Assembler::vpaddw(dst, nds, src, vector_len); |
|
4243 } else if (dst_enc < 16) { |
|
4244 Assembler::vpaddw(dst, dst, src, vector_len); |
|
4245 } else if (nds_enc < 16) { |
|
4246 // implies dst_enc in upper bank with src as scratch |
|
4247 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4248 Assembler::vpaddw(nds, nds, src, vector_len); |
|
4249 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4250 } else { |
|
4251 // worse case scenario, all regs in upper bank |
|
4252 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4253 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4254 Assembler::vpaddw(xmm0, xmm0, src, vector_len); |
|
4255 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4256 } |
|
4257 } |
|
4258 |
|
4259 void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
|
4260 int dst_enc = dst->encoding(); |
|
4261 int nds_enc = nds->encoding(); |
|
4262 int src_enc = src->encoding(); |
|
4263 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4264 Assembler::vpsubb(dst, nds, src, vector_len); |
|
4265 } else if ((dst_enc < 16) && (src_enc < 16)) { |
|
4266 Assembler::vpsubb(dst, dst, src, vector_len); |
|
4267 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4268 // use nds as scratch for src |
|
4269 evmovdqul(nds, src, Assembler::AVX_512bit); |
|
4270 Assembler::vpsubb(dst, dst, nds, vector_len); |
|
4271 } else if ((src_enc < 16) && (nds_enc < 16)) { |
|
4272 // use nds as scratch for dst |
|
4273 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4274 Assembler::vpsubb(nds, nds, src, vector_len); |
|
4275 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4276 } else if (dst_enc < 16) { |
|
4277 // use nds as scatch for xmm0 to hold src |
|
4278 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4279 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4280 Assembler::vpsubb(dst, dst, xmm0, vector_len); |
|
4281 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4282 } else { |
|
4283 // worse case scenario, all regs are in the upper bank |
|
4284 subptr(rsp, 64); |
|
4285 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4286 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4287 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4288 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4289 Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len); |
|
4290 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4291 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4292 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4293 addptr(rsp, 64); |
|
4294 } |
|
4295 } |
|
4296 |
|
4297 void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
|
4298 int dst_enc = dst->encoding(); |
|
4299 int nds_enc = nds->encoding(); |
|
4300 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4301 Assembler::vpsubb(dst, nds, src, vector_len); |
|
4302 } else if (dst_enc < 16) { |
|
4303 Assembler::vpsubb(dst, dst, src, vector_len); |
|
4304 } else if (nds_enc < 16) { |
|
4305 // implies dst_enc in upper bank with src as scratch |
|
4306 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4307 Assembler::vpsubb(nds, nds, src, vector_len); |
|
4308 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4309 } else { |
|
4310 // worse case scenario, all regs in upper bank |
|
4311 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4312 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4313 Assembler::vpsubw(xmm0, xmm0, src, vector_len); |
|
4314 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4315 } |
|
4316 } |
|
4317 |
|
4318 void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
|
4319 int dst_enc = dst->encoding(); |
|
4320 int nds_enc = nds->encoding(); |
|
4321 int src_enc = src->encoding(); |
|
4322 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4323 Assembler::vpsubw(dst, nds, src, vector_len); |
|
4324 } else if ((dst_enc < 16) && (src_enc < 16)) { |
|
4325 Assembler::vpsubw(dst, dst, src, vector_len); |
|
4326 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4327 // use nds as scratch for src |
|
4328 evmovdqul(nds, src, Assembler::AVX_512bit); |
|
4329 Assembler::vpsubw(dst, dst, nds, vector_len); |
|
4330 } else if ((src_enc < 16) && (nds_enc < 16)) { |
|
4331 // use nds as scratch for dst |
|
4332 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4333 Assembler::vpsubw(nds, nds, src, vector_len); |
|
4334 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4335 } else if (dst_enc < 16) { |
|
4336 // use nds as scatch for xmm0 to hold src |
|
4337 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4338 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4339 Assembler::vpsubw(dst, dst, xmm0, vector_len); |
|
4340 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4341 } else { |
|
4342 // worse case scenario, all regs are in the upper bank |
|
4343 subptr(rsp, 64); |
|
4344 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4345 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4346 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4347 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4348 Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len); |
|
4349 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4350 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4351 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4352 addptr(rsp, 64); |
|
4353 } |
|
4354 } |
|
4355 |
|
4356 void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
|
4357 int dst_enc = dst->encoding(); |
|
4358 int nds_enc = nds->encoding(); |
|
4359 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4360 Assembler::vpsubw(dst, nds, src, vector_len); |
|
4361 } else if (dst_enc < 16) { |
|
4362 Assembler::vpsubw(dst, dst, src, vector_len); |
|
4363 } else if (nds_enc < 16) { |
|
4364 // implies dst_enc in upper bank with src as scratch |
|
4365 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4366 Assembler::vpsubw(nds, nds, src, vector_len); |
|
4367 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4368 } else { |
|
4369 // worse case scenario, all regs in upper bank |
|
4370 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4371 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4372 Assembler::vpsubw(xmm0, xmm0, src, vector_len); |
|
4373 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4374 } |
|
4375 } |
|
4376 |
|
4377 |
|
4378 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
|
4379 int dst_enc = dst->encoding(); |
|
4380 int nds_enc = nds->encoding(); |
|
4381 int src_enc = src->encoding(); |
|
4382 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4383 Assembler::vpmullw(dst, nds, src, vector_len); |
|
4384 } else if ((dst_enc < 16) && (src_enc < 16)) { |
|
4385 Assembler::vpmullw(dst, dst, src, vector_len); |
|
4386 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4387 // use nds as scratch for src |
|
4388 evmovdqul(nds, src, Assembler::AVX_512bit); |
|
4389 Assembler::vpmullw(dst, dst, nds, vector_len); |
|
4390 } else if ((src_enc < 16) && (nds_enc < 16)) { |
|
4391 // use nds as scratch for dst |
|
4392 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4393 Assembler::vpmullw(nds, nds, src, vector_len); |
|
4394 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4395 } else if (dst_enc < 16) { |
|
4396 // use nds as scatch for xmm0 to hold src |
|
4397 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4398 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4399 Assembler::vpmullw(dst, dst, xmm0, vector_len); |
|
4400 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4401 } else { |
|
4402 // worse case scenario, all regs are in the upper bank |
|
4403 subptr(rsp, 64); |
|
4404 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4405 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4406 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4407 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4408 Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len); |
|
4409 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4410 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4411 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4412 addptr(rsp, 64); |
|
4413 } |
|
4414 } |
|
4415 |
|
4416 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
|
4417 int dst_enc = dst->encoding(); |
|
4418 int nds_enc = nds->encoding(); |
|
4419 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4420 Assembler::vpmullw(dst, nds, src, vector_len); |
|
4421 } else if (dst_enc < 16) { |
|
4422 Assembler::vpmullw(dst, dst, src, vector_len); |
|
4423 } else if (nds_enc < 16) { |
|
4424 // implies dst_enc in upper bank with src as scratch |
|
4425 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4426 Assembler::vpmullw(nds, nds, src, vector_len); |
|
4427 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4428 } else { |
|
4429 // worse case scenario, all regs in upper bank |
|
4430 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4431 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4432 Assembler::vpmullw(xmm0, xmm0, src, vector_len); |
|
4433 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4434 } |
|
4435 } |
|
4436 |
|
4437 void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
|
4438 int dst_enc = dst->encoding(); |
|
4439 int nds_enc = nds->encoding(); |
|
4440 int shift_enc = shift->encoding(); |
|
4441 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4442 Assembler::vpsraw(dst, nds, shift, vector_len); |
|
4443 } else if ((dst_enc < 16) && (shift_enc < 16)) { |
|
4444 Assembler::vpsraw(dst, dst, shift, vector_len); |
|
4445 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4446 // use nds_enc as scratch with shift |
|
4447 evmovdqul(nds, shift, Assembler::AVX_512bit); |
|
4448 Assembler::vpsraw(dst, dst, nds, vector_len); |
|
4449 } else if ((shift_enc < 16) && (nds_enc < 16)) { |
|
4450 // use nds as scratch with dst |
|
4451 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4452 Assembler::vpsraw(nds, nds, shift, vector_len); |
|
4453 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4454 } else if (dst_enc < 16) { |
|
4455 // use nds to save a copy of xmm0 and hold shift |
|
4456 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4457 evmovdqul(xmm0, shift, Assembler::AVX_512bit); |
|
4458 Assembler::vpsraw(dst, dst, xmm0, vector_len); |
|
4459 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4460 } else if (nds_enc < 16) { |
|
4461 // use nds as dest as temps |
|
4462 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4463 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4464 evmovdqul(xmm0, shift, Assembler::AVX_512bit); |
|
4465 Assembler::vpsraw(nds, nds, xmm0, vector_len); |
|
4466 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4467 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4468 } else { |
|
4469 // worse case scenario, all regs are in the upper bank |
|
4470 subptr(rsp, 64); |
|
4471 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4472 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4473 evmovdqul(xmm1, shift, Assembler::AVX_512bit); |
|
4474 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4475 Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); |
|
4476 evmovdqul(xmm1, dst, Assembler::AVX_512bit); |
|
4477 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4478 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4479 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4480 addptr(rsp, 64); |
|
4481 } |
|
4482 } |
|
4483 |
|
4484 void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
|
4485 int dst_enc = dst->encoding(); |
|
4486 int nds_enc = nds->encoding(); |
|
4487 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4488 Assembler::vpsraw(dst, nds, shift, vector_len); |
|
4489 } else if (dst_enc < 16) { |
|
4490 Assembler::vpsraw(dst, dst, shift, vector_len); |
|
4491 } else if (nds_enc < 16) { |
|
4492 // use nds as scratch |
|
4493 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4494 Assembler::vpsraw(nds, nds, shift, vector_len); |
|
4495 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4496 } else { |
|
4497 // use nds as scratch for xmm0 |
|
4498 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4499 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4500 Assembler::vpsraw(xmm0, xmm0, shift, vector_len); |
|
4501 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4502 } |
|
4503 } |
|
4504 |
|
4505 void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
|
4506 int dst_enc = dst->encoding(); |
|
4507 int nds_enc = nds->encoding(); |
|
4508 int shift_enc = shift->encoding(); |
|
4509 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4510 Assembler::vpsrlw(dst, nds, shift, vector_len); |
|
4511 } else if ((dst_enc < 16) && (shift_enc < 16)) { |
|
4512 Assembler::vpsrlw(dst, dst, shift, vector_len); |
|
4513 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4514 // use nds_enc as scratch with shift |
|
4515 evmovdqul(nds, shift, Assembler::AVX_512bit); |
|
4516 Assembler::vpsrlw(dst, dst, nds, vector_len); |
|
4517 } else if ((shift_enc < 16) && (nds_enc < 16)) { |
|
4518 // use nds as scratch with dst |
|
4519 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4520 Assembler::vpsrlw(nds, nds, shift, vector_len); |
|
4521 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4522 } else if (dst_enc < 16) { |
|
4523 // use nds to save a copy of xmm0 and hold shift |
|
4524 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4525 evmovdqul(xmm0, shift, Assembler::AVX_512bit); |
|
4526 Assembler::vpsrlw(dst, dst, xmm0, vector_len); |
|
4527 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4528 } else if (nds_enc < 16) { |
|
4529 // use nds as dest as temps |
|
4530 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4531 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4532 evmovdqul(xmm0, shift, Assembler::AVX_512bit); |
|
4533 Assembler::vpsrlw(nds, nds, xmm0, vector_len); |
|
4534 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4535 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4536 } else { |
|
4537 // worse case scenario, all regs are in the upper bank |
|
4538 subptr(rsp, 64); |
|
4539 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4540 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4541 evmovdqul(xmm1, shift, Assembler::AVX_512bit); |
|
4542 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4543 Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); |
|
4544 evmovdqul(xmm1, dst, Assembler::AVX_512bit); |
|
4545 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4546 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4547 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4548 addptr(rsp, 64); |
|
4549 } |
|
4550 } |
|
4551 |
|
4552 void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
|
4553 int dst_enc = dst->encoding(); |
|
4554 int nds_enc = nds->encoding(); |
|
4555 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4556 Assembler::vpsrlw(dst, nds, shift, vector_len); |
|
4557 } else if (dst_enc < 16) { |
|
4558 Assembler::vpsrlw(dst, dst, shift, vector_len); |
|
4559 } else if (nds_enc < 16) { |
|
4560 // use nds as scratch |
|
4561 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4562 Assembler::vpsrlw(nds, nds, shift, vector_len); |
|
4563 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4564 } else { |
|
4565 // use nds as scratch for xmm0 |
|
4566 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4567 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4568 Assembler::vpsrlw(xmm0, xmm0, shift, vector_len); |
|
4569 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4570 } |
|
4571 } |
|
4572 |
|
4573 void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { |
|
4574 int dst_enc = dst->encoding(); |
|
4575 int nds_enc = nds->encoding(); |
|
4576 int shift_enc = shift->encoding(); |
|
4577 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4578 Assembler::vpsllw(dst, nds, shift, vector_len); |
|
4579 } else if ((dst_enc < 16) && (shift_enc < 16)) { |
|
4580 Assembler::vpsllw(dst, dst, shift, vector_len); |
|
4581 } else if ((dst_enc < 16) && (nds_enc < 16)) { |
|
4582 // use nds_enc as scratch with shift |
|
4583 evmovdqul(nds, shift, Assembler::AVX_512bit); |
|
4584 Assembler::vpsllw(dst, dst, nds, vector_len); |
|
4585 } else if ((shift_enc < 16) && (nds_enc < 16)) { |
|
4586 // use nds as scratch with dst |
|
4587 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4588 Assembler::vpsllw(nds, nds, shift, vector_len); |
|
4589 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4590 } else if (dst_enc < 16) { |
|
4591 // use nds to save a copy of xmm0 and hold shift |
|
4592 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4593 evmovdqul(xmm0, shift, Assembler::AVX_512bit); |
|
4594 Assembler::vpsllw(dst, dst, xmm0, vector_len); |
|
4595 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4596 } else if (nds_enc < 16) { |
|
4597 // use nds as dest as temps |
|
4598 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4599 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4600 evmovdqul(xmm0, shift, Assembler::AVX_512bit); |
|
4601 Assembler::vpsllw(nds, nds, xmm0, vector_len); |
|
4602 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4603 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4604 } else { |
|
4605 // worse case scenario, all regs are in the upper bank |
|
4606 subptr(rsp, 64); |
|
4607 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4608 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4609 evmovdqul(xmm1, shift, Assembler::AVX_512bit); |
|
4610 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4611 Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len); |
|
4612 evmovdqul(xmm1, dst, Assembler::AVX_512bit); |
|
4613 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4614 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4615 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4616 addptr(rsp, 64); |
|
4617 } |
|
4618 } |
|
4619 |
|
4620 void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
|
4621 int dst_enc = dst->encoding(); |
|
4622 int nds_enc = nds->encoding(); |
|
4623 if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) { |
|
4624 Assembler::vpsllw(dst, nds, shift, vector_len); |
|
4625 } else if (dst_enc < 16) { |
|
4626 Assembler::vpsllw(dst, dst, shift, vector_len); |
|
4627 } else if (nds_enc < 16) { |
|
4628 // use nds as scratch |
|
4629 evmovdqul(nds, dst, Assembler::AVX_512bit); |
|
4630 Assembler::vpsllw(nds, nds, shift, vector_len); |
|
4631 evmovdqul(dst, nds, Assembler::AVX_512bit); |
|
4632 } else { |
|
4633 // use nds as scratch for xmm0 |
|
4634 evmovdqul(nds, xmm0, Assembler::AVX_512bit); |
|
4635 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4636 Assembler::vpsllw(xmm0, xmm0, shift, vector_len); |
|
4637 evmovdqul(xmm0, nds, Assembler::AVX_512bit); |
|
4638 } |
|
4639 } |
|
4640 |
|
4641 // This instruction exists within macros, ergo we cannot control its input |
|
4642 // when emitted through those patterns. |
|
4643 void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) { |
|
4644 if (VM_Version::supports_avx512nobw()) { |
|
4645 int dst_enc = dst->encoding(); |
|
4646 int src_enc = src->encoding(); |
|
4647 if (dst_enc == src_enc) { |
|
4648 if (dst_enc < 16) { |
|
4649 Assembler::punpcklbw(dst, src); |
|
4650 } else { |
|
4651 subptr(rsp, 64); |
|
4652 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4653 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4654 Assembler::punpcklbw(xmm0, xmm0); |
|
4655 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4656 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4657 addptr(rsp, 64); |
|
4658 } |
|
4659 } else { |
|
4660 if ((src_enc < 16) && (dst_enc < 16)) { |
|
4661 Assembler::punpcklbw(dst, src); |
|
4662 } else if (src_enc < 16) { |
|
4663 subptr(rsp, 64); |
|
4664 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4665 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4666 Assembler::punpcklbw(xmm0, src); |
|
4667 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4668 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4669 addptr(rsp, 64); |
|
4670 } else if (dst_enc < 16) { |
|
4671 subptr(rsp, 64); |
|
4672 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4673 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4674 Assembler::punpcklbw(dst, xmm0); |
|
4675 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4676 addptr(rsp, 64); |
|
4677 } else { |
|
4678 subptr(rsp, 64); |
|
4679 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4680 subptr(rsp, 64); |
|
4681 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4682 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4683 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4684 Assembler::punpcklbw(xmm0, xmm1); |
|
4685 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4686 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4687 addptr(rsp, 64); |
|
4688 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4689 addptr(rsp, 64); |
|
4690 } |
|
4691 } |
|
4692 } else { |
|
4693 Assembler::punpcklbw(dst, src); |
|
4694 } |
|
4695 } |
|
4696 |
|
4697 // This instruction exists within macros, ergo we cannot control its input |
|
4698 // when emitted through those patterns. |
|
4699 void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { |
|
4700 if (VM_Version::supports_avx512nobw()) { |
|
4701 int dst_enc = dst->encoding(); |
|
4702 int src_enc = src->encoding(); |
|
4703 if (dst_enc == src_enc) { |
|
4704 if (dst_enc < 16) { |
|
4705 Assembler::pshuflw(dst, src, mode); |
|
4706 } else { |
|
4707 subptr(rsp, 64); |
|
4708 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4709 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4710 Assembler::pshuflw(xmm0, xmm0, mode); |
|
4711 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4712 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4713 addptr(rsp, 64); |
|
4714 } |
|
4715 } else { |
|
4716 if ((src_enc < 16) && (dst_enc < 16)) { |
|
4717 Assembler::pshuflw(dst, src, mode); |
|
4718 } else if (src_enc < 16) { |
|
4719 subptr(rsp, 64); |
|
4720 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4721 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4722 Assembler::pshuflw(xmm0, src, mode); |
|
4723 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4724 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4725 addptr(rsp, 64); |
|
4726 } else if (dst_enc < 16) { |
|
4727 subptr(rsp, 64); |
|
4728 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4729 evmovdqul(xmm0, src, Assembler::AVX_512bit); |
|
4730 Assembler::pshuflw(dst, xmm0, mode); |
|
4731 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4732 addptr(rsp, 64); |
|
4733 } else { |
|
4734 subptr(rsp, 64); |
|
4735 evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit); |
|
4736 subptr(rsp, 64); |
|
4737 evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit); |
|
4738 evmovdqul(xmm0, dst, Assembler::AVX_512bit); |
|
4739 evmovdqul(xmm1, src, Assembler::AVX_512bit); |
|
4740 Assembler::pshuflw(xmm0, xmm1, mode); |
|
4741 evmovdqul(dst, xmm0, Assembler::AVX_512bit); |
|
4742 evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit); |
|
4743 addptr(rsp, 64); |
|
4744 evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit); |
|
4745 addptr(rsp, 64); |
|
4746 } |
|
4747 } |
|
4748 } else { |
|
4749 Assembler::pshuflw(dst, src, mode); |
4050 } |
4750 } |
4051 } |
4751 } |
4052 |
4752 |
4053 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { |
4753 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { |
4054 if (reachable(src)) { |
4754 if (reachable(src)) { |