--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Dec 14 15:53:48 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 15 13:42:13 2015 +0000
@@ -2152,33 +2152,64 @@
emit_int8(0xC0 | encode);
}
-void Assembler::kmovwl(KRegister dst, Register src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
+void Assembler::kmovbl(KRegister dst, Register src) {
+ assert(VM_Version::supports_avx512dq(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovbl(Register dst, KRegister src) {
+ assert(VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x93);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovwl(KRegister dst, Register src) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::kmovwl(Register dst, KRegister src) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x93);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::kmovdl(KRegister dst, Register src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
- VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+ assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::kmovdl(Register dst, KRegister src) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x93);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::kmovql(KRegister dst, KRegister src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x90);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovql(KRegister dst, Address src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
@@ -2187,7 +2218,7 @@
}
void Assembler::kmovql(Address dst, KRegister src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
@@ -2196,46 +2227,53 @@
}
void Assembler::kmovql(KRegister dst, Register src) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
- VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
- InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::kmovql(Register dst, KRegister src) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x93);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
// This instruction produces ZF or CF flags
void Assembler::kortestbl(KRegister src1, KRegister src2) {
- NOT_LP64(assert(VM_Version::supports_avx512dq(), ""));
+ assert(VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestwl(KRegister src1, KRegister src2) {
- NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestdl(KRegister src1, KRegister src2) {
- NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+ assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestql(KRegister src1, KRegister src2) {
- NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+ assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -2375,7 +2413,7 @@
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_int8((unsigned char)(0xC0 | encode));
@@ -2395,7 +2433,7 @@
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@@ -2404,7 +2442,7 @@
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_int8((unsigned char)(0xC0 | encode));
@@ -2424,7 +2462,7 @@
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@@ -3069,7 +3107,7 @@
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x67);
@@ -3078,7 +3116,7 @@
void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x67);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3086,7 +3124,7 @@
void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "some form of AVX must be enabled");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x67);
@@ -3128,7 +3166,7 @@
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x74);
@@ -3148,16 +3186,28 @@
// In this context, kdst is written the mask used to process the equal components
void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x74);
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x74);
+ emit_operand(as_Register(dst_enc), src);
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x75);
@@ -3177,16 +3227,28 @@
// In this context, kdst is written the mask used to process the equal components
void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x75);
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x75);
+ emit_operand(as_Register(dst_enc), src);
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x76);
@@ -3213,9 +3275,21 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x76);
+ emit_operand(as_Register(dst_enc), src);
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
- NOT_LP64(assert(VM_Version::supports_sse4_1(), ""));
+ assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x29);
@@ -3328,7 +3402,7 @@
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
@@ -3337,7 +3411,7 @@
void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3347,7 +3421,7 @@
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
assert(dst != xnoreg, "sanity");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
@@ -3452,7 +3526,7 @@
void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_ssse3(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x00);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3461,7 +3535,7 @@
void Assembler::pshufb(XMMRegister dst, Address src) {
assert(VM_Version::supports_ssse3(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x00);
@@ -3495,7 +3569,7 @@
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x70);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3507,7 +3581,7 @@
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x70);
@@ -4723,7 +4797,7 @@
void Assembler::paddb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFC);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4731,7 +4805,7 @@
void Assembler::paddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFD);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4771,7 +4845,7 @@
void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFC);
@@ -4780,7 +4854,7 @@
void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFD);
@@ -4808,7 +4882,7 @@
void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -4819,7 +4893,7 @@
void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -4851,7 +4925,7 @@
void Assembler::psubb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF8);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4859,7 +4933,7 @@
void Assembler::psubw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF9);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4882,7 +4956,7 @@
void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF8);
@@ -4891,7 +4965,7 @@
void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF9);
@@ -4919,7 +4993,7 @@
void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -4930,7 +5004,7 @@
void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -4962,7 +5036,7 @@
void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD5);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4978,7 +5052,7 @@
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD5);
@@ -5006,7 +5080,7 @@
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -5039,7 +5113,7 @@
// Shift packed integers left by specified number of bits.
void Assembler::psllw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM6 is for /6 encoding: 66 0F 71 /6 ib
int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@@ -5069,7 +5143,7 @@
void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF1);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5093,7 +5167,7 @@
void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM6 is for /6 encoding: 66 0F 71 /6 ib
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@@ -5124,7 +5198,7 @@
void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF1);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5149,7 +5223,7 @@
// Shift packed integers logically right by specified number of bits.
void Assembler::psrlw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM2 is for /2 encoding: 66 0F 71 /2 ib
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@@ -5181,7 +5255,7 @@
void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD1);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5205,7 +5279,7 @@
void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM2 is for /2 encoding: 66 0F 71 /2 ib
int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@@ -5235,7 +5309,7 @@
void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD1);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5260,7 +5334,7 @@
// Shift packed integers arithmetically right by specified number of bits.
void Assembler::psraw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@@ -5280,7 +5354,7 @@
void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE1);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5296,7 +5370,7 @@
void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@@ -5316,7 +5390,7 @@
void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE1);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5706,7 +5780,7 @@
// duplicate 2-bytes integer data from src into 16 locations in dest
void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx2(), "");
- InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x79);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6573,18 +6647,6 @@
}
}
-int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
- VexOpcode opc, InstructionAttr *attributes) {
- int nds_enc = nds->is_valid() ? nds->encoding() : 0;
- return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
-}
-
-int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
- VexOpcode opc, InstructionAttr *attributes) {
- int nds_enc = nds->is_valid() ? nds->encoding() : 0;
- return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
-}
-
void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!VM_Version::supports_evex(), "");
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Dec 14 15:53:48 2015 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Dec 15 13:42:13 2015 +0000
@@ -7999,9 +7999,15 @@
XMMRegister vec1, int ae) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
+ Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
int stride, stride2, adr_stride, adr_stride1, adr_stride2;
+ int stride2x2 = 0x40;
Address::ScaleFactor scale, scale1, scale2;
+ if (ae != StrIntrinsicNode::LL) {
+ stride2x2 = 0x20;
+ }
+
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
shrl(cnt2, 1);
}
@@ -8011,15 +8017,15 @@
movl(result, cnt1);
subl(cnt1, cnt2);
push(cnt1);
- cmov32(Assembler::lessEqual, cnt2, result);
+ cmov32(Assembler::lessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2)
// Is the minimum length zero?
testl(cnt2, cnt2);
jcc(Assembler::zero, LENGTH_DIFF_LABEL);
if (ae == StrIntrinsicNode::LL) {
// Load first bytes
- load_unsigned_byte(result, Address(str1, 0));
- load_unsigned_byte(cnt1, Address(str2, 0));
+ load_unsigned_byte(result, Address(str1, 0)); // result = str1[0]
+ load_unsigned_byte(cnt1, Address(str2, 0)); // cnt1 = str2[0]
} else if (ae == StrIntrinsicNode::UU) {
// Load first characters
load_unsigned_short(result, Address(str1, 0));
@@ -8060,7 +8066,10 @@
assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
+ Label COMPARE_WIDE_VECTORS_LOOP_AVX2;
Label COMPARE_TAIL_LONG;
+ Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3
+
int pcmpmask = 0x19;
if (ae == StrIntrinsicNode::LL) {
pcmpmask &= ~0x01;
@@ -8123,11 +8132,40 @@
}
subl(result, stride2);
subl(cnt2, stride2);
- jccb(Assembler::zero, COMPARE_WIDE_TAIL);
+ jcc(Assembler::zero, COMPARE_WIDE_TAIL);
negptr(result);
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
bind(COMPARE_WIDE_VECTORS_LOOP);
+
+#ifdef _LP64
+ if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
+ cmpl(cnt2, stride2x2);
+ jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
+ testl(cnt2, stride2x2-1); // cnt2 holds the vector count
+ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2); // means we cannot subtract by 0x40
+
+ bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
+ if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
+ evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
+ } else {
+ vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
+ evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
+ }
+ kortestql(k7, k7);
+ jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
+ addptr(result, stride2x2); // update since we already compared at this addr
+ subl(cnt2, stride2x2); // and sub the size too
+ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
+
+ vpxor(vec1, vec1);
+ jmpb(COMPARE_WIDE_TAIL);
+ }//if (VM_Version::supports_avx512vlbw())
+#endif // _LP64
+
+
+ bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
@@ -8136,7 +8174,7 @@
vpxor(vec1, Address(str2, result, scale2));
}
vptest(vec1, vec1);
- jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
+ jcc(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
@@ -8151,7 +8189,7 @@
movl(result, stride2);
movl(cnt2, result);
negptr(result);
- jmpb(COMPARE_WIDE_VECTORS_LOOP);
+ jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
@@ -8295,6 +8333,34 @@
}
jmpb(DONE_LABEL);
+#ifdef _LP64
+ if (VM_Version::supports_avx512vlbw()) {
+
+ bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
+
+ kmovql(cnt1, k7);
+ notq(cnt1);
+ bsfq(cnt2, cnt1);
+ if (ae != StrIntrinsicNode::LL) {
+ // Divide diff by 2 to get number of chars
+ sarl(cnt2, 1);
+ }
+ addq(result, cnt2);
+ if (ae == StrIntrinsicNode::LL) {
+ load_unsigned_byte(cnt1, Address(str2, result));
+ load_unsigned_byte(result, Address(str1, result));
+ } else if (ae == StrIntrinsicNode::UU) {
+ load_unsigned_short(cnt1, Address(str2, result, scale));
+ load_unsigned_short(result, Address(str1, result, scale));
+ } else {
+ load_unsigned_short(cnt1, Address(str2, result, scale2));
+ load_unsigned_byte(result, Address(str1, result, scale1));
+ }
+ subl(result, cnt1);
+ jmpb(POP_LABEL);
+ }//if (VM_Version::supports_avx512vlbw())
+#endif // _LP64
+
// Discard the stored length difference
bind(POP_LABEL);
pop(cnt1);
@@ -8304,6 +8370,7 @@
if(ae == StrIntrinsicNode::UL) {
negl(result);
}
+
}
// Search for Non-ASCII character (Negative byte value) in a byte array,
@@ -9439,13 +9506,184 @@
pop(tmp1);
}
+void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
+ Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
+ assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.");
+ Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
+ Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL;
+ Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
+ Label SAME_TILL_END, DONE;
+ Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL;
+
+ //scale is in rcx in both Win64 and Unix
+ ShortBranchVerifier sbv(this);
+
+ shlq(length);
+ xorq(result, result);
+
+ cmpq(length, 8);
+ jcc(Assembler::equal, VECTOR8_LOOP);
+ jcc(Assembler::less, VECTOR4_TAIL);
+
+ if (UseAVX >= 2){
+
+ cmpq(length, 16);
+ jcc(Assembler::equal, VECTOR16_LOOP);
+ jcc(Assembler::less, VECTOR8_LOOP);
+
+ cmpq(length, 32);
+ jccb(Assembler::less, VECTOR16_TAIL);
+
+ subq(length, 32);
+ bind(VECTOR32_LOOP);
+ vmovdqu(rymm0, Address(obja, result));
+ vmovdqu(rymm1, Address(objb, result));
+ vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit);
+ vptest(rymm2, rymm2);
+ jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found
+ addq(result, 32);
+ subq(length, 32);
+ jccb(Assembler::greaterEqual, VECTOR32_LOOP);
+ addq(length, 32);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 32 bytes left //close the branch here.
+
+ bind(VECTOR16_TAIL);
+ cmpq(length, 16);
+ jccb(Assembler::less, VECTOR8_TAIL);
+ bind(VECTOR16_LOOP);
+ movdqu(rymm0, Address(obja, result));
+ movdqu(rymm1, Address(objb, result));
+ vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit);
+ ptest(rymm2, rymm2);
+ jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
+ addq(result, 16);
+ subq(length, 16);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 16 bytes left
+ } else {//regular intrinsics
+
+ cmpq(length, 16);
+ jccb(Assembler::less, VECTOR8_TAIL);
+
+ subq(length, 16);
+ bind(VECTOR16_LOOP);
+ movdqu(rymm0, Address(obja, result));
+ movdqu(rymm1, Address(objb, result));
+ pxor(rymm0, rymm1);
+ ptest(rymm0, rymm0);
+ jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
+ addq(result, 16);
+ subq(length, 16);
+ jccb(Assembler::greaterEqual, VECTOR16_LOOP);
+ addq(length, 16);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 16 bytes left
+ }
+
+ bind(VECTOR8_TAIL);
+ cmpq(length, 8);
+ jccb(Assembler::less, VECTOR4_TAIL);
+ bind(VECTOR8_LOOP);
+ movq(tmp1, Address(obja, result));
+ movq(tmp2, Address(objb, result));
+ xorq(tmp1, tmp2);
+ testq(tmp1, tmp1);
+ jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found
+ addq(result, 8);
+ subq(length, 8);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 8 bytes left
+
+ bind(VECTOR4_TAIL);
+ cmpq(length, 4);
+ jccb(Assembler::less, BYTES_TAIL);
+ bind(VECTOR4_LOOP);
+ movl(tmp1, Address(obja, result));
+ xorl(tmp1, Address(objb, result));
+ testl(tmp1, tmp1);
+ jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found
+ addq(result, 4);
+ subq(length, 4);
+ jcc(Assembler::equal, SAME_TILL_END);
+ //falling through if less than 4 bytes left
+
+ bind(BYTES_TAIL);
+ bind(BYTES_LOOP);
+ load_unsigned_byte(tmp1, Address(obja, result));
+ load_unsigned_byte(tmp2, Address(objb, result));
+ xorl(tmp1, tmp2);
+ testl(tmp1, tmp1);
+ jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ decq(length);
+ jccb(Assembler::zero, SAME_TILL_END);
+ incq(result);
+ load_unsigned_byte(tmp1, Address(obja, result));
+ load_unsigned_byte(tmp2, Address(objb, result));
+ xorl(tmp1, tmp2);
+ testl(tmp1, tmp1);
+ jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ decq(length);
+ jccb(Assembler::zero, SAME_TILL_END);
+ incq(result);
+ load_unsigned_byte(tmp1, Address(obja, result));
+ load_unsigned_byte(tmp2, Address(objb, result));
+ xorl(tmp1, tmp2);
+ testl(tmp1, tmp1);
+ jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ jmpb(SAME_TILL_END);
+
+ if (UseAVX >= 2){
+ bind(VECTOR32_NOT_EQUAL);
+ vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
+ vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
+ vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit);
+ vpmovmskb(tmp1, rymm0);
+ bsfq(tmp1, tmp1);
+ addq(result, tmp1);
+ shrq(result);
+ jmpb(DONE);
+ }
+
+ bind(VECTOR16_NOT_EQUAL);
+ if (UseAVX >= 2){
+ vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
+ vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
+ pxor(rymm0, rymm2);
+ } else {
+ pcmpeqb(rymm2, rymm2);
+ pxor(rymm0, rymm1);
+ pcmpeqb(rymm0, rymm1);
+ pxor(rymm0, rymm2);
+ }
+ pmovmskb(tmp1, rymm0);
+ bsfq(tmp1, tmp1);
+ addq(result, tmp1);
+ shrq(result);
+ jmpb(DONE);
+
+ bind(VECTOR8_NOT_EQUAL);
+ bind(VECTOR4_NOT_EQUAL);
+ bsfq(tmp1, tmp1);
+ shrq(tmp1, 3);
+ addq(result, tmp1);
+ bind(BYTES_NOT_EQUAL);
+ shrq(result);
+ jmpb(DONE);
+
+ bind(SAME_TILL_END);
+ mov64(result, -1);
+
+ bind(DONE);
+}
+
+
//Helper functions for square_to_len()
/**
* Store the squares of x[], right shifted one bit (divided by 2) into z[]
* Preserves x and z and modifies rest of the registers.
*/
-
void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
// Perform square and right shift by 1
// Handle odd xlen case first, then for even xlen do the following