--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Nov 16 14:19:10 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Nov 19 16:07:22 2015 -0800
@@ -2152,6 +2152,23 @@
emit_int8(0xC0 | encode);
}
+void Assembler::kmovwl(KRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovdl(KRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::kmovql(KRegister dst, KRegister src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -2187,20 +2204,39 @@
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::kmovdl(KRegister dst, Register src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
- VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+// This instruction produces ZF or CF flags
+void Assembler::kortestbl(KRegister src1, KRegister src2) {
+ NOT_LP64(assert(VM_Version::supports_avx512dq(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
- emit_int8((unsigned char)0x92);
- emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::kmovwl(KRegister dst, Register src) {
+ int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x98);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestwl(KRegister src1, KRegister src2) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
- emit_int8((unsigned char)0x92);
+ int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x98);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestdl(KRegister src1, KRegister src2) {
+ NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x98);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestql(KRegister src1, KRegister src2) {
+ NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -2337,6 +2373,63 @@
}
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
+void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_operand(dst, src);
+}
+
+void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(src != xnoreg, "sanity");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x7F);
+ emit_operand(src, dst);
+}
+
+void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_operand(dst, src);
+}
+
+void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(src != xnoreg, "sanity");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x7F);
+ emit_operand(src, dst);
+}
void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -3033,6 +3126,36 @@
emit_int8(imm8);
}
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x74);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x74);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x74);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
@@ -3041,9 +3164,9 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- assert(!VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -3051,6 +3174,87 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x75);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x76);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x76);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x76);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse4_1(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x29);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x29);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x29);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x29);
+ emit_operand(as_Register(dst_enc), src);
+}
+
void Assembler::pmovmskb(Register dst, XMMRegister src) {
assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
@@ -3139,11 +3343,11 @@
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
+void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
assert(dst != xnoreg, "sanity");
- InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Nov 16 14:19:10 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Nov 19 16:07:22 2015 -0800
@@ -3949,6 +3949,236 @@
testl(dst, as_Address(src));
}
+void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pcmpeqb(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpeqb(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpeqb(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpeqb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpeqb(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pcmpeqw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpeqw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpeqw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpeqw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpeqw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
+ int dst_enc = dst->encoding();
+ if (dst_enc < 16) {
+ Assembler::pcmpestri(dst, src, imm8);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpestri(xmm0, src, imm8);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pcmpestri(dst, src, imm8);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pcmpestri(xmm0, src, imm8);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pcmpestri(dst, xmm0, imm8);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pcmpestri(xmm1, xmm0, imm8);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pmovzxbw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::pmovzxbw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::pmovzxbw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
+ int dst_enc = dst->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::pmovzxbw(dst, src);
+ } else if (dst_enc < 16) {
+ Assembler::pmovzxbw(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::pmovzxbw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
+ int src_enc = src->encoding();
+ if (src_enc < 16) {
+ Assembler::pmovmskb(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::pmovmskb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::ptest(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::ptest(xmm0, src);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::ptest(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::ptest(xmm1, xmm0);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::sqrtsd(dst, as_Address(src));
@@ -4256,6 +4486,214 @@
}
}
+void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpbroadcastw(dst, src);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpbroadcastw(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpbroadcastw(xmm0, src);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpbroadcastw(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vpbroadcastw(xmm1, xmm0);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->encoding();
+ int src_enc = src->encoding();
+ assert(dst_enc == nds_enc, "");
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpcmpeqb(dst, nds, src, vector_len);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpcmpeqb(dst, nds, src, vector_len);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->encoding();
+ int src_enc = src->encoding();
+ assert(dst_enc == nds_enc, "");
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpcmpeqw(dst, nds, src, vector_len);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpcmpeqw(dst, nds, src, vector_len);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
+ movdqu(dst, xmm1);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
+ int dst_enc = dst->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpmovzxbw(dst, src, vector_len);
+ } else if (dst_enc < 16) {
+ Assembler::vpmovzxbw(dst, src, vector_len);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpmovzxbw(xmm0, src, vector_len);
+ movdqu(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
+ int src_enc = src->encoding();
+ if (src_enc < 16) {
+ Assembler::vpmovmskb(dst, src);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpmovmskb(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->encoding();
+ int src_enc = src->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpmullw(dst, nds, src, vector_len);
+ } else if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vpmullw(dst, dst, src, vector_len);
+ } else if ((dst_enc < 16) && (nds_enc < 16)) {
+ // use nds as scratch for src
+ evmovdqul(nds, src, Assembler::AVX_512bit);
+ Assembler::vpmullw(dst, dst, nds, vector_len);
+ } else if ((src_enc < 16) && (nds_enc < 16)) {
+ // use nds as scratch for dst
+ evmovdqul(nds, dst, Assembler::AVX_512bit);
+ Assembler::vpmullw(nds, nds, src, vector_len);
+ evmovdqul(dst, nds, Assembler::AVX_512bit);
+ } else if (dst_enc < 16) {
+ // use nds as scatch for xmm0 to hold src
+ evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vpmullw(dst, dst, xmm0, vector_len);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ } else {
+ // worse case scenario, all regs are in the upper bank
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm1, src, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
+void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->encoding();
+ if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+ Assembler::vpmullw(dst, nds, src, vector_len);
+ } else if (dst_enc < 16) {
+ Assembler::vpmullw(dst, dst, src, vector_len);
+ } else if (nds_enc < 16) {
+ // implies dst_enc in upper bank with src as scratch
+ evmovdqul(nds, dst, Assembler::AVX_512bit);
+ Assembler::vpmullw(nds, nds, src, vector_len);
+ evmovdqul(dst, nds, Assembler::AVX_512bit);
+ } else {
+ // worse case scenario, all regs in upper bank
+ evmovdqul(nds, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vpmullw(xmm0, xmm0, src, vector_len);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ }
+}
+
void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
@@ -4374,66 +4812,6 @@
}
}
-
-void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmullw(dst, nds, src, vector_len);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpmullw(dst, dst, src, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for src
- evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpmullw(dst, dst, nds, vector_len);
- } else if ((src_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds as scatch for xmm0 to hold src
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpmullw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- subptr(rsp, 64);
- evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
- addptr(rsp, 64);
- }
-}
-
-void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmullw(dst, nds, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpmullw(dst, dst, src, vector_len);
- } else if (nds_enc < 16) {
- // implies dst_enc in upper bank with src as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs in upper bank
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(xmm0, xmm0, src, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
-}
-
void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
int dst_enc = dst->encoding();
int nds_enc = nds->encoding();
@@ -4638,6 +5016,40 @@
}
}
+void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ if ((dst_enc < 16) && (src_enc < 16)) {
+ Assembler::vptest(dst, src);
+ } else if (src_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+ Assembler::vptest(xmm0, src);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else if (dst_enc < 16) {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ Assembler::vptest(dst, xmm0);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+ movdqu(xmm0, src);
+ movdqu(xmm1, dst);
+ Assembler::vptest(xmm1, xmm0);
+ evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+}
+
// This instruction exists within macros, ergo we cannot control its input
// when emitted through those patterns.
void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
@@ -7722,7 +8134,7 @@
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
} else {
- vpmovzxbw(vec1, Address(str1, result, scale1));
+ vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
vpxor(vec1, Address(str2, result, scale2));
}
vptest(vec1, vec1);