8210764: Update avx512 implementation
Reviewed-by: kvn
Contributed-by: sandhya.viswanathan@intel.com
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -2792,7 +2792,10 @@
}
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+ // tmp must be unused
+ assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
+
if (left->is_single_cpu()) {
assert(dest->is_single_cpu(), "expect single result reg");
__ negw(dest->as_register(), left->as_register());
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -3265,7 +3265,9 @@
}
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+ // tmp must be unused
+ assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
if (left->is_single_cpu()) {
assert (dest->type() == T_INT, "unexpected result type");
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -2840,7 +2840,9 @@
}
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+ // tmp must be unused
+ assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
assert(left->is_register(), "can only handle registers");
if (left->is_single_cpu()) {
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -2850,7 +2850,9 @@
ShouldNotCallThis(); // There are no delay slots on ZARCH_64.
}
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+ // tmp must be unused
+ assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
assert(left->is_register(), "can only handle registers");
if (left->is_single_cpu()) {
--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -3024,7 +3024,9 @@
}
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+ // tmp must be unused
+ assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
assert(left->is_register(), "can only handle registers");
if (left->is_single_cpu()) {
--- a/src/hotspot/cpu/x86/assembler_x86.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -2199,7 +2199,7 @@
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
- InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x28);
@@ -2209,7 +2209,7 @@
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x28);
emit_int8((unsigned char)(0xC0 | encode));
@@ -2217,7 +2217,7 @@
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
@@ -2465,8 +2465,7 @@
void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_int8((unsigned char)(0xC0 | encode));
@@ -2583,7 +2582,7 @@
assert(VM_Version::supports_avx512vlbw(), "");
assert(is_vector_masking(), ""); // For stub code use only
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -2608,7 +2607,7 @@
assert(is_vector_masking(), "");
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -2752,7 +2751,7 @@
void Assembler::movlpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -3512,7 +3511,7 @@
void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
int dst_enc = kdst->encoding();
@@ -3525,7 +3524,7 @@
assert(is_vector_masking(), "");
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
@@ -3538,7 +3537,7 @@
void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x3E);
@@ -3549,7 +3548,7 @@
void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
assert(is_vector_masking(), "");
assert(VM_Version::supports_avx512vlbw(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -3562,7 +3561,7 @@
void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
int dst_enc = kdst->encoding();
@@ -3575,7 +3574,7 @@
void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int dst_enc = kdst->encoding();
@@ -3588,7 +3587,7 @@
assert(VM_Version::supports_avx512vlbw(), "");
assert(is_vector_masking(), ""); // For stub code use only
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
@@ -3741,7 +3740,7 @@
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3750,7 +3749,7 @@
void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
@@ -3760,7 +3759,7 @@
void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3769,7 +3768,7 @@
void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
@@ -3779,7 +3778,7 @@
void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC5);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3788,7 +3787,7 @@
void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x15);
@@ -3798,7 +3797,7 @@
void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x14);
@@ -3808,7 +3807,7 @@
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3817,7 +3816,7 @@
void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
@@ -3827,7 +3826,7 @@
void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3836,7 +3835,7 @@
void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
@@ -3846,7 +3845,7 @@
void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC4);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3855,7 +3854,7 @@
void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC4);
@@ -3865,7 +3864,7 @@
void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x20);
@@ -3876,7 +3875,7 @@
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
@@ -3885,7 +3884,7 @@
void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
emit_int8((unsigned char)(0xC0 | encode));
@@ -3895,7 +3894,7 @@
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
assert(dst != xnoreg, "sanity");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
@@ -3906,7 +3905,7 @@
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
vector_len == AVX_256bit? VM_Version::supports_avx2() :
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
emit_int8((unsigned char) (0xC0 | encode));
@@ -3918,7 +3917,7 @@
assert(VM_Version::supports_avx512vlbw(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -3930,7 +3929,7 @@
assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
@@ -3943,7 +3942,7 @@
assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
@@ -3957,7 +3956,7 @@
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
@@ -3969,7 +3968,7 @@
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
vector_len == AVX_256bit? VM_Version::supports_avx2() :
vector_len == AVX_512bit? VM_Version::supports_evex() : 0, " ");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x33);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4082,7 +4081,7 @@
void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_ssse3(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x00);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4101,7 +4100,7 @@
void Assembler::pshufb(XMMRegister dst, Address src) {
assert(VM_Version::supports_ssse3(), "");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x00);
@@ -4147,7 +4146,7 @@
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x70);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4159,7 +4158,7 @@
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x70);
@@ -4180,7 +4179,7 @@
void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift left 128 bit value in dst XMMRegister by shift number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4190,7 +4189,7 @@
void Assembler::pslldq(XMMRegister dst, int shift) {
// Shift left 128 bit value in dst XMMRegister by shift number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM7 is for /7 encoding: 66 0F 73 /7 ib
int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@@ -4456,7 +4455,7 @@
void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_ssse3(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x0F);
emit_int8((unsigned char)(0xC0 | encode));
@@ -4477,6 +4476,7 @@
void Assembler::evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x3);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5591,7 +5591,7 @@
void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx() && (vector_len == 0) ||
VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x01);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5600,7 +5600,7 @@
void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx() && (vector_len == 0) ||
VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x02);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5650,7 +5650,7 @@
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse3(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x01);
emit_int8((unsigned char)(0xC0 | encode));
@@ -5658,7 +5658,7 @@
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse3(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x02);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6281,6 +6281,15 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0xDF);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+
void Assembler::por(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6369,8 +6378,7 @@
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x38);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6383,9 +6391,8 @@
assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x38);
@@ -6398,7 +6405,8 @@
void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x38);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6413,10 +6421,10 @@
assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
- InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+ attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
emit_operand(dst, src);
@@ -6430,9 +6438,10 @@
void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
- emit_int8(0x38);
+ emit_int8(0x3A);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 256 bits
// 0x01 - insert into upper 256 bits
@@ -6445,8 +6454,7 @@
void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6459,9 +6467,8 @@
assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
@@ -6472,16 +6479,16 @@
}
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
- assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx2(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into q0 128 bits (0..127)
// 0x01 - insert into q1 128 bits (128..255)
- // 0x02 - insert into q2 128 bits (256..383)
- // 0x03 - insert into q3 128 bits (384..511)
+ // 0x02 - insert into q0 128 bits (256..383)
+ // 0x03 - insert into q1 128 bits (384..512)
emit_int8(imm8 & 0x03);
}
@@ -6489,24 +6496,24 @@
assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
- InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
emit_operand(dst, src);
// 0x00 - insert into q0 128 bits (0..127)
// 0x01 - insert into q1 128 bits (128..255)
- // 0x02 - insert into q2 128 bits (256..383)
- // 0x03 - insert into q3 128 bits (384..511)
+ // 0x02 - insert into q0 128 bits (256..383)
+ // 0x03 - insert into q1 128 bits (384..512)
emit_int8(imm8 & 0x03);
}
void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1A);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6520,8 +6527,9 @@
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionMark im(this);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1A);
emit_operand(dst, src);
@@ -6534,10 +6542,9 @@
// vextracti forms
void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
- assert(VM_Version::supports_avx(), "");
+ assert(VM_Version::supports_avx2(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6550,9 +6557,8 @@
assert(VM_Version::supports_avx2(), "");
assert(src != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
attributes.reset_is_clear_context();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -6564,10 +6570,10 @@
}
void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
- assert(VM_Version::supports_avx(), "");
+ assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6583,9 +6589,10 @@
assert(src != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionMark im(this);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
attributes.reset_is_clear_context();
+ attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_operand(src, dst);
@@ -6599,7 +6606,8 @@
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx512dq(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6613,7 +6621,8 @@
void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x3B);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6622,14 +6631,28 @@
emit_int8(imm8 & 0x01);
}
-
+void Assembler::vextracti64x4(Address dst, XMMRegister src, uint8_t imm8) {
+ assert(VM_Version::supports_evex(), "");
+ assert(src != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
+ attributes.reset_is_clear_context();
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8(0x38);
+ emit_operand(src, dst);
+ // 0x00 - extract from lower 256 bits
+ // 0x01 - extract from upper 256 bits
+ emit_int8(imm8 & 0x01);
+}
// vextractf forms
void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6642,9 +6665,8 @@
assert(VM_Version::supports_avx(), "");
assert(src != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
- InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
attributes.reset_is_clear_context();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -6656,10 +6678,10 @@
}
void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
- assert(VM_Version::supports_avx(), "");
+ assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6675,9 +6697,10 @@
assert(src != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionMark im(this);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
attributes.reset_is_clear_context();
+ attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_operand(src, dst);
@@ -6691,7 +6714,8 @@
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx512dq(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6705,7 +6729,8 @@
void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x01, "imm8: %u", imm8);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1B);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6719,9 +6744,10 @@
assert(src != xnoreg, "sanity");
assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionMark im(this);
- InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit);
attributes.reset_is_clear_context();
+ attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1B);
emit_operand(src, dst);
@@ -6730,38 +6756,17 @@
emit_int8(imm8 & 0x01);
}
-
-// legacy word/dword replicate
-void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
- assert(VM_Version::supports_avx2(), "");
- InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
- emit_int8(0x79);
- emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
+// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
+void Assembler::vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx2(), "");
- InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
- int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
- emit_int8(0x58);
- emit_int8((unsigned char)(0xC0 | encode));
-}
-
-
-// xmm/mem sourced byte/word/dword/qword replicate
-
-// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
-void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x78);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastb(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6773,16 +6778,16 @@
}
// duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
-void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x79);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastw(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6793,17 +6798,19 @@
emit_operand(dst, src);
}
+// xmm/mem sourced byte/word/dword/qword replicate
+
// duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX >= 2, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastd(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6815,8 +6822,8 @@
}
// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6824,8 +6831,8 @@
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastq(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6863,16 +6870,16 @@
// scalar single/double precision replicate
// duplicate single precision data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x18);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastss(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6884,8 +6891,8 @@
}
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6893,8 +6900,8 @@
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6911,7 +6918,7 @@
// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6921,7 +6928,7 @@
// duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
- assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6967,7 +6974,7 @@
// Carry-Less Multiplication Quadword
void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
assert(VM_Version::supports_clmul(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x44);
emit_int8((unsigned char)(0xC0 | encode));
@@ -6977,7 +6984,7 @@
// Carry-Less Multiplication Quadword
void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x44);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7597,33 +7604,23 @@
set_attributes(attributes);
attributes->set_current_assembler(this);
- // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
- if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
- switch (attributes->get_vector_len()) {
- case AVX_128bit:
- case AVX_256bit:
- attributes->set_is_legacy_mode();
- break;
+ // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
+ // is allowed in legacy mode and has resources which will fit in it.
+ // Pure EVEX instructions will have is_evex_instruction set in their definition.
+ if (!attributes->is_legacy_mode()) {
+ if (UseAVX > 2 && !attributes->is_evex_instruction() && !_is_managed) {
+ if ((attributes->get_vector_len() != AVX_512bit) && (nds_enc < 16) && (xreg_enc < 16)) {
+ attributes->set_is_legacy_mode();
+ }
}
}
- // For pure EVEX check and see if this instruction
- // is allowed in legacy mode and has resources which will
- // fit in it. Pure EVEX instructions will use set_is_evex_instruction in their definition,
- // else that field is set when we encode to EVEX
- if (UseAVX > 2 && !attributes->is_legacy_mode() &&
- !_is_managed && !attributes->is_evex_instruction()) {
- if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
- bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
- if (check_register_bank) {
- // check nds_enc and xreg_enc for upper bank usage
- if (nds_enc < 16 && xreg_enc < 16) {
- attributes->set_is_legacy_mode();
- }
- } else {
- attributes->set_is_legacy_mode();
- }
- }
+ if (UseAVX > 2) {
+ assert(((!attributes->uses_vl()) ||
+ (attributes->get_vector_len() == AVX_512bit) ||
+ (!_legacy_mode_vl) ||
+ (attributes->is_legacy_mode())),"XMM register should be 0-15");
+ assert(((nds_enc < 16 && xreg_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
}
_is_managed = false;
@@ -7653,43 +7650,31 @@
bool vex_x = false;
set_attributes(attributes);
attributes->set_current_assembler(this);
- bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
-
- // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
- if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
- switch (attributes->get_vector_len()) {
- case AVX_128bit:
- case AVX_256bit:
- if (check_register_bank) {
- if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
- // up propagate arithmetic instructions to meet RA requirements
- attributes->set_vector_len(AVX_512bit);
- } else {
+
+ // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
+ // is allowed in legacy mode and has resources which will fit in it.
+ // Pure EVEX instructions will have is_evex_instruction set in their definition.
+ if (!attributes->is_legacy_mode()) {
+ if (UseAVX > 2 && !attributes->is_evex_instruction() && !_is_managed) {
+ if ((!attributes->uses_vl() || (attributes->get_vector_len() != AVX_512bit)) &&
+ (dst_enc < 16) && (nds_enc < 16) && (src_enc < 16)) {
attributes->set_is_legacy_mode();
- }
- } else {
- attributes->set_is_legacy_mode();
}
- break;
}
}
- // For pure EVEX check and see if this instruction
- // is allowed in legacy mode and has resources which will
- // fit in it. Pure EVEX instructions will use set_is_evex_instruction in their definition,
- // else that field is set when we encode to EVEX
- if (UseAVX > 2 && !attributes->is_legacy_mode() &&
- !_is_managed && !attributes->is_evex_instruction()) {
- if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
- if (check_register_bank) {
- // check dst_enc, nds_enc and src_enc for upper bank usage
- if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
- attributes->set_is_legacy_mode();
- }
- } else {
- attributes->set_is_legacy_mode();
- }
- }
+ if (UseAVX > 2) {
+ // All the scalar fp instructions (with uses_vl as false) can have legacy_mode as false
+ // Instruction with uses_vl true are vector instructions
+ // All the vector instructions with AVX_512bit length can have legacy_mode as false
+ // All the vector instructions with < AVX_512bit length can have legacy_mode as false if AVX512vl() is supported
+ // Rest all should have legacy_mode set as true
+ assert(((!attributes->uses_vl()) ||
+ (attributes->get_vector_len() == AVX_512bit) ||
+ (!_legacy_mode_vl) ||
+ (attributes->is_legacy_mode())),"XMM register should be 0-15");
+ // Instruction with legacy_mode true should have dst, nds and src < 15
+ assert(((dst_enc < 16 && nds_enc < 16 && src_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
}
_is_managed = false;
@@ -7741,7 +7726,7 @@
void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC2);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7751,7 +7736,7 @@
void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x4B);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7762,7 +7747,7 @@
void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC2);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7772,7 +7757,7 @@
void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x4A);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7782,7 +7767,7 @@
void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
assert(VM_Version::supports_avx2(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8((unsigned char)0x02);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7791,7 +7776,7 @@
void Assembler::shlxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
@@ -7799,7 +7784,7 @@
void Assembler::shlxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
--- a/src/hotspot/cpu/x86/assembler_x86.hpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp Mon Sep 24 16:37:28 2018 -0700
@@ -2097,6 +2097,7 @@
// Andn packed integers
void pandn(XMMRegister dst, XMMRegister src);
+ void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Or packed integers
void por(XMMRegister dst, XMMRegister src);
@@ -2134,6 +2135,7 @@
void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
+ void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
// vextractf forms
void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
@@ -2144,28 +2146,24 @@
void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
- // legacy xmm sourced word/dword replicate
- void vpbroadcastw(XMMRegister dst, XMMRegister src);
- void vpbroadcastd(XMMRegister dst, XMMRegister src);
-
// xmm/mem sourced byte/word/dword/qword replicate
- void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
- void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
- void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
- void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
- void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
- void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
- void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
- void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
+ void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
+ void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
+ void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
+ void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
// scalar single/double precision replicate
- void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
- void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
- void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
- void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
+ void vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastss(XMMRegister dst, Address src, int vector_len);
+ void vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastsd(XMMRegister dst, Address src, int vector_len);
// gpr sourced byte/word/dword/qword replicate
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -68,7 +68,6 @@
static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], (jlong)UCONST64(0x8000000000000000), (jlong)UCONST64(0x8000000000000000));
-
NEEDS_CLEANUP // remove this definitions ?
const Register IC_Klass = rax; // where the IC klass is cached
const Register SYNC_header = rax; // synchronization header
@@ -650,7 +649,7 @@
case T_FLOAT: {
if (dest->is_single_xmm()) {
- if (c->is_zero_float()) {
+ if (LP64_ONLY(UseAVX < 2 &&) c->is_zero_float()) {
__ xorps(dest->as_xmm_float_reg(), dest->as_xmm_float_reg());
} else {
__ movflt(dest->as_xmm_float_reg(),
@@ -672,7 +671,7 @@
case T_DOUBLE: {
if (dest->is_double_xmm()) {
- if (c->is_zero_double()) {
+ if (LP64_ONLY(UseAVX < 2 &&) c->is_zero_double()) {
__ xorpd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg());
} else {
__ movdbl(dest->as_xmm_double_reg(),
@@ -2395,16 +2394,24 @@
}
-void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
if (value->is_double_xmm()) {
switch(code) {
case lir_abs :
{
- if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
- __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
+#ifdef _LP64
+ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+ assert(tmp->is_valid(), "need temporary");
+ __ vpandn(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), value->as_xmm_double_reg(), 2);
+ } else {
+#endif
+ if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
+ __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
+ }
+ assert(!tmp->is_valid(), "do not need temporary");
+ __ andpd(dest->as_xmm_double_reg(),
+ ExternalAddress((address)double_signmask_pool));
}
- __ andpd(dest->as_xmm_double_reg(),
- ExternalAddress((address)double_signmask_pool));
}
break;
@@ -3734,7 +3741,7 @@
}
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
if (left->is_single_cpu()) {
__ negl(left->as_register());
move_regs(left->as_register(), dest->as_register());
@@ -3759,24 +3766,36 @@
#endif // _LP64
} else if (dest->is_single_xmm()) {
- if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
- __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
+#ifdef _LP64
+ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+ assert(tmp->is_valid(), "need temporary");
+ assert_different_registers(left->as_xmm_float_reg(), tmp->as_xmm_float_reg());
+ __ vpxor(dest->as_xmm_float_reg(), tmp->as_xmm_float_reg(), left->as_xmm_float_reg(), 2);
}
- if (UseAVX > 0) {
- __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
- ExternalAddress((address)float_signflip_pool));
- } else {
+ else
+#endif
+ {
+ assert(!tmp->is_valid(), "do not need temporary");
+ if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
+ __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
+ }
__ xorps(dest->as_xmm_float_reg(),
ExternalAddress((address)float_signflip_pool));
}
} else if (dest->is_double_xmm()) {
- if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
- __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
+#ifdef _LP64
+ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+ assert(tmp->is_valid(), "need temporary");
+ assert_different_registers(left->as_xmm_double_reg(), tmp->as_xmm_double_reg());
+ __ vpxor(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), left->as_xmm_double_reg(), 2);
}
- if (UseAVX > 0) {
- __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
- ExternalAddress((address)double_signflip_pool));
- } else {
+ else
+#endif
+ {
+ assert(!tmp->is_valid(), "do not need temporary");
+ if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
+ __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
+ }
__ xorpd(dest->as_xmm_double_reg(),
ExternalAddress((address)double_signflip_pool));
}
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -320,7 +320,21 @@
value.set_destroys_register();
value.load_item();
LIR_Opr reg = rlock(x);
- __ negate(value.result(), reg);
+
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+#ifdef _LP64
+ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+ if (x->type()->tag() == doubleTag) {
+ tmp = new_register(T_DOUBLE);
+ __ move(LIR_OprFact::doubleConst(-0.0), tmp);
+ }
+ else if (x->type()->tag() == floatTag) {
+ tmp = new_register(T_FLOAT);
+ __ move(LIR_OprFact::floatConst(-0.0), tmp);
+ }
+ }
+#endif
+ __ negate(value.result(), reg, tmp);
set_result(x, round_item(reg));
}
@@ -748,8 +762,17 @@
LIR_Opr calc_input = value.result();
LIR_Opr calc_result = rlock_result(x);
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
+#ifdef _LP64
+ if (UseAVX > 2 && (!VM_Version::supports_avx512vl()) &&
+ (x->id() == vmIntrinsics::_dabs)) {
+ tmp = new_register(T_DOUBLE);
+ __ move(LIR_OprFact::doubleConst(-0.0), tmp);
+ }
+#endif
+
switch(x->id()) {
- case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
+ case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, tmp); break;
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
default: ShouldNotReachHere();
}
--- a/src/hotspot/cpu/x86/globals_x86.hpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/globals_x86.hpp Mon Sep 24 16:37:28 2018 -0700
@@ -119,7 +119,7 @@
product(bool, UseStoreImmI16, true, \
"Use store immediate 16-bits value instruction on x86") \
\
- product(intx, UseAVX, 2, \
+ product(intx, UseAVX, 3, \
"Highest supported AVX instructions set on x86/x64") \
range(0, 99) \
\
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -2942,16 +2942,6 @@
}
}
-void MacroAssembler::push_zmm(XMMRegister reg) {
- lea(rsp, Address(rsp, -64)); // Use lea to not affect flags
- evmovdqul(Address(rsp, 0), reg, Assembler::AVX_512bit);
-}
-
-void MacroAssembler::pop_zmm(XMMRegister reg) {
- evmovdqul(reg, Address(rsp, 0), Assembler::AVX_512bit);
- lea(rsp, Address(rsp, 64)); // Use lea to not affect flags
-}
-
void MacroAssembler::fremr(Register tmp) {
save_rax(tmp);
{ Label L;
@@ -3332,27 +3322,18 @@
}
void MacroAssembler::movdqu(Address dst, XMMRegister src) {
- if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
- Assembler::vextractf32x4(dst, src, 0);
- } else {
+ assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
- }
}
void MacroAssembler::movdqu(XMMRegister dst, Address src) {
- if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
- Assembler::vinsertf32x4(dst, dst, src, 0);
- } else {
+ assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
- }
}
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
- if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
- Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
- } else {
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::movdqu(dst, src);
- }
}
void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
@@ -3365,28 +3346,18 @@
}
void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
- if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
- vextractf64x4_low(dst, src);
- } else {
+ assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
- }
}
void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
- if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
- vinsertf64x4_low(dst, src);
- } else {
+ assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
- }
}
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
- if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
- Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
- }
- else {
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
Assembler::vmovdqu(dst, src);
- }
}
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
@@ -3670,187 +3641,43 @@
}
void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::pcmpeqb(dst, src);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::pcmpeqb(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pcmpeqb(xmm0, src);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::pcmpeqb(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::pcmpeqb(xmm1, xmm0);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::pcmpeqb(dst, src);
}
void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::pcmpeqw(dst, src);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::pcmpeqw(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pcmpeqw(xmm0, src);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::pcmpeqw(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::pcmpeqw(xmm1, xmm0);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::pcmpeqw(dst, src);
}
void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
- int dst_enc = dst->encoding();
- if (dst_enc < 16) {
- Assembler::pcmpestri(dst, src, imm8);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pcmpestri(xmm0, src, imm8);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- }
+ assert((dst->encoding() < 16),"XMM register should be 0-15");
+ Assembler::pcmpestri(dst, src, imm8);
}
void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::pcmpestri(dst, src, imm8);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pcmpestri(xmm0, src, imm8);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::pcmpestri(dst, xmm0, imm8);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::pcmpestri(xmm1, xmm0, imm8);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15");
+ Assembler::pcmpestri(dst, src, imm8);
}
void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::pmovzxbw(dst, src);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::pmovzxbw(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pmovzxbw(xmm0, src);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::pmovzxbw(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::pmovzxbw(xmm1, xmm0);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::pmovzxbw(dst, src);
}
void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
- int dst_enc = dst->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::pmovzxbw(dst, src);
- } else if (dst_enc < 16) {
- Assembler::pmovzxbw(dst, src);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pmovzxbw(xmm0, src);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::pmovzxbw(dst, src);
}
void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
- int src_enc = src->encoding();
- if (src_enc < 16) {
- Assembler::pmovmskb(dst, src);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::pmovmskb(dst, xmm0);
- pop_zmm(xmm0);
- }
+ assert((src->encoding() < 16),"XMM register should be 0-15");
+ Assembler::pmovmskb(dst, src);
}
void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::ptest(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::ptest(xmm0, src);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::ptest(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::ptest(xmm1, xmm0);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15");
+ Assembler::ptest(dst, src);
}
void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
@@ -3979,194 +3806,33 @@
}
void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if ((dst_enc < 16) && (nds_enc < 16)) {
- vandps(dst, nds, negate_field, vector_len);
- } else if ((src_enc < 16) && (dst_enc < 16)) {
- // Use src scratch register
- evmovdqul(src, nds, Assembler::AVX_512bit);
- vandps(dst, src, negate_field, vector_len);
- } else if (dst_enc < 16) {
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- vandps(dst, dst, negate_field, vector_len);
- } else if (nds_enc < 16) {
- vandps(nds, nds, negate_field, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (src_enc < 16) {
- evmovdqul(src, nds, Assembler::AVX_512bit);
- vandps(src, src, negate_field, vector_len);
- evmovdqul(dst, src, Assembler::AVX_512bit);
- } else {
- if (src_enc != dst_enc) {
- // Use src scratch register
- evmovdqul(src, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- vandps(xmm0, xmm0, negate_field, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- vandps(xmm0, xmm0, negate_field, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- }
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+ vandps(dst, nds, negate_field, vector_len);
}
void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if ((dst_enc < 16) && (nds_enc < 16)) {
- vandpd(dst, nds, negate_field, vector_len);
- } else if ((src_enc < 16) && (dst_enc < 16)) {
- // Use src scratch register
- evmovdqul(src, nds, Assembler::AVX_512bit);
- vandpd(dst, src, negate_field, vector_len);
- } else if (dst_enc < 16) {
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- vandpd(dst, dst, negate_field, vector_len);
- } else if (nds_enc < 16) {
- vandpd(nds, nds, negate_field, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (src_enc < 16) {
- evmovdqul(src, nds, Assembler::AVX_512bit);
- vandpd(src, src, negate_field, vector_len);
- evmovdqul(dst, src, Assembler::AVX_512bit);
- } else {
- if (src_enc != dst_enc) {
- evmovdqul(src, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- vandpd(xmm0, xmm0, negate_field, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- vandpd(xmm0, xmm0, negate_field, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- }
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+ vandpd(dst, nds, negate_field, vector_len);
}
void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpaddb(dst, nds, src, vector_len);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpaddb(dst, dst, src, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for src
- evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpaddb(dst, dst, nds, vector_len);
- } else if ((src_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpaddb(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds as scatch for xmm0 to hold src
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpaddb(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpaddb(dst, nds, src, vector_len);
}
void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpaddb(dst, nds, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpaddb(dst, dst, src, vector_len);
- } else if (nds_enc < 16) {
- // implies dst_enc in upper bank with src as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpaddb(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs in upper bank
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpaddb(xmm0, xmm0, src, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpaddb(dst, nds, src, vector_len);
}
void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpaddw(dst, nds, src, vector_len);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpaddw(dst, dst, src, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for src
- evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpaddw(dst, dst, nds, vector_len);
- } else if ((src_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpaddw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds as scatch for xmm0 to hold src
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpaddw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpaddw(dst, nds, src, vector_len);
}
void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpaddw(dst, nds, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpaddw(dst, dst, src, vector_len);
- } else if (nds_enc < 16) {
- // implies dst_enc in upper bank with nds as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpaddw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs in upper bank
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpaddw(xmm0, xmm0, src, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpaddw(dst, nds, src, vector_len);
}
void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
@@ -4178,627 +3844,109 @@
}
}
-void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpbroadcastw(dst, src);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpbroadcastw(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpbroadcastw(xmm0, src);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpbroadcastw(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::vpbroadcastw(xmm1, xmm0);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpbroadcastw(dst, src, vector_len);
}
void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- assert(dst_enc == nds_enc, "");
- if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpcmpeqb(dst, nds, src, vector_len);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpcmpeqb(dst, nds, src, vector_len);
}
void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- assert(dst_enc == nds_enc, "");
- if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpcmpeqw(dst, nds, src, vector_len);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
- movdqu(dst, xmm1);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpcmpeqw(dst, nds, src, vector_len);
}
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmovzxbw(dst, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpmovzxbw(dst, src, vector_len);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmovzxbw(xmm0, src, vector_len);
- movdqu(dst, xmm0);
- pop_zmm(xmm0);
- }
+ assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpmovzxbw(dst, src, vector_len);
}
void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
- int src_enc = src->encoding();
- if (src_enc < 16) {
- Assembler::vpmovmskb(dst, src);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpmovmskb(dst, xmm0);
- pop_zmm(xmm0);
- }
+ assert((src->encoding() < 16),"XMM register should be 0-15");
+ Assembler::vpmovmskb(dst, src);
}
void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmullw(dst, nds, src, vector_len);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpmullw(dst, dst, src, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for src
- evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpmullw(dst, dst, nds, vector_len);
- } else if ((src_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds as scatch for xmm0 to hold src
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpmullw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpmullw(dst, nds, src, vector_len);
}
void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpmullw(dst, nds, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpmullw(dst, dst, src, vector_len);
- } else if (nds_enc < 16) {
- // implies dst_enc in upper bank with src as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs in upper bank
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpmullw(xmm0, xmm0, src, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpmullw(dst, nds, src, vector_len);
}
void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubb(dst, nds, src, vector_len);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpsubb(dst, dst, src, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for src
- evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpsubb(dst, dst, nds, vector_len);
- } else if ((src_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds as scatch for xmm0 to hold src
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpsubb(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsubb(dst, nds, src, vector_len);
}
void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubb(dst, nds, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpsubb(dst, dst, src, vector_len);
- } else if (nds_enc < 16) {
- // implies dst_enc in upper bank with src as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs in upper bank
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubb(xmm0, xmm0, src, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsubb(dst, nds, src, vector_len);
}
void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int src_enc = src->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubw(dst, nds, src, vector_len);
- } else if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vpsubw(dst, dst, src, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for src
- evmovdqul(nds, src, Assembler::AVX_512bit);
- Assembler::vpsubw(dst, dst, nds, vector_len);
- } else if ((src_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch for dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds as scatch for xmm0 to hold src
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vpsubw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsubw(dst, nds, src, vector_len);
}
void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsubw(dst, nds, src, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpsubw(dst, dst, src, vector_len);
- } else if (nds_enc < 16) {
- // implies dst_enc in upper bank with src as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(nds, nds, src, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs in upper bank
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(xmm0, xmm0, src, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsubw(dst, nds, src, vector_len);
}
void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int shift_enc = shift->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsraw(dst, nds, shift, vector_len);
- } else if ((dst_enc < 16) && (shift_enc < 16)) {
- Assembler::vpsraw(dst, dst, shift, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch with shift
- evmovdqul(nds, shift, Assembler::AVX_512bit);
- Assembler::vpsraw(dst, dst, nds, vector_len);
- } else if ((shift_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch with dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsraw(nds, nds, shift, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds to save a copy of xmm0 and hold shift
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, shift, Assembler::AVX_512bit);
- Assembler::vpsraw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else if (nds_enc < 16) {
- // use nds and dst as temps
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, shift, Assembler::AVX_512bit);
- Assembler::vpsraw(nds, nds, xmm0, vector_len);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, shift, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsraw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsraw(dst, nds, shift, vector_len);
}
void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsraw(dst, nds, shift, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpsraw(dst, dst, shift, vector_len);
- } else if (nds_enc < 16) {
- // use nds as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsraw(nds, nds, shift, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // use nds as scratch for xmm0
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsraw(xmm0, xmm0, shift, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsraw(dst, nds, shift, vector_len);
}
void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int shift_enc = shift->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsrlw(dst, nds, shift, vector_len);
- } else if ((dst_enc < 16) && (shift_enc < 16)) {
- Assembler::vpsrlw(dst, dst, shift, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch with shift
- evmovdqul(nds, shift, Assembler::AVX_512bit);
- Assembler::vpsrlw(dst, dst, nds, vector_len);
- } else if ((shift_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch with dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsrlw(nds, nds, shift, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds to save a copy of xmm0 and hold shift
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, shift, Assembler::AVX_512bit);
- Assembler::vpsrlw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else if (nds_enc < 16) {
- // use nds and dst as temps
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, shift, Assembler::AVX_512bit);
- Assembler::vpsrlw(nds, nds, xmm0, vector_len);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, shift, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsrlw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsrlw(dst, nds, shift, vector_len);
}
void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsrlw(dst, nds, shift, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpsrlw(dst, dst, shift, vector_len);
- } else if (nds_enc < 16) {
- // use nds as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsrlw(nds, nds, shift, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // use nds as scratch for xmm0
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsrlw(xmm0, xmm0, shift, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsrlw(dst, nds, shift, vector_len);
}
void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- int shift_enc = shift->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsllw(dst, nds, shift, vector_len);
- } else if ((dst_enc < 16) && (shift_enc < 16)) {
- Assembler::vpsllw(dst, dst, shift, vector_len);
- } else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch with shift
- evmovdqul(nds, shift, Assembler::AVX_512bit);
- Assembler::vpsllw(dst, dst, nds, vector_len);
- } else if ((shift_enc < 16) && (nds_enc < 16)) {
- // use nds as scratch with dst
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsllw(nds, nds, shift, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- // use nds to save a copy of xmm0 and hold shift
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, shift, Assembler::AVX_512bit);
- Assembler::vpsllw(dst, dst, xmm0, vector_len);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- } else if (nds_enc < 16) {
- // use nds and dst as temps
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, shift, Assembler::AVX_512bit);
- Assembler::vpsllw(nds, nds, xmm0, vector_len);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // worse case scenario, all regs are in the upper bank
- push_zmm(xmm1);
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm1, shift, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- }
+ assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsllw(dst, nds, shift, vector_len);
}
void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
- int dst_enc = dst->encoding();
- int nds_enc = nds->encoding();
- if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
- Assembler::vpsllw(dst, nds, shift, vector_len);
- } else if (dst_enc < 16) {
- Assembler::vpsllw(dst, dst, shift, vector_len);
- } else if (nds_enc < 16) {
- // use nds as scratch
- evmovdqul(nds, dst, Assembler::AVX_512bit);
- Assembler::vpsllw(nds, nds, shift, vector_len);
- evmovdqul(dst, nds, Assembler::AVX_512bit);
- } else {
- // use nds as scratch for xmm0
- evmovdqul(nds, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::vpsllw(dst, nds, shift, vector_len);
}
void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if ((dst_enc < 16) && (src_enc < 16)) {
- Assembler::vptest(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vptest(xmm0, src);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::vptest(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- movdqu(xmm0, src);
- movdqu(xmm1, dst);
- Assembler::vptest(xmm1, xmm0);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
-}
-
-// This instruction exists within macros, ergo we cannot control its input
-// when emitted through those patterns.
+ assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15");
+ Assembler::vptest(dst, src);
+}
+
void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
- if (VM_Version::supports_avx512nobw()) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if (dst_enc == src_enc) {
- if (dst_enc < 16) {
- Assembler::punpcklbw(dst, src);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::punpcklbw(xmm0, xmm0);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- }
- } else {
- if ((src_enc < 16) && (dst_enc < 16)) {
- Assembler::punpcklbw(dst, src);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::punpcklbw(xmm0, src);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::punpcklbw(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- Assembler::punpcklbw(xmm0, xmm1);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
- }
- } else {
- Assembler::punpcklbw(dst, src);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::punpcklbw(dst, src);
}
void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
- if (VM_Version::supports_avx512vl()) {
- Assembler::pshufd(dst, src, mode);
- } else {
- int dst_enc = dst->encoding();
- if (dst_enc < 16) {
- Assembler::pshufd(dst, src, mode);
- } else {
- push_zmm(xmm0);
- Assembler::pshufd(xmm0, src, mode);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- }
- }
-}
-
-// This instruction exists within macros, ergo we cannot control its input
-// when emitted through those patterns.
+ assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ Assembler::pshufd(dst, src, mode);
+}
+
void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
- if (VM_Version::supports_avx512nobw()) {
- int dst_enc = dst->encoding();
- int src_enc = src->encoding();
- if (dst_enc == src_enc) {
- if (dst_enc < 16) {
- Assembler::pshuflw(dst, src, mode);
- } else {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pshuflw(xmm0, xmm0, mode);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- }
- } else {
- if ((src_enc < 16) && (dst_enc < 16)) {
- Assembler::pshuflw(dst, src, mode);
- } else if (src_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::pshuflw(xmm0, src, mode);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm0);
- } else if (dst_enc < 16) {
- push_zmm(xmm0);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
- Assembler::pshuflw(dst, xmm0, mode);
- pop_zmm(xmm0);
- } else {
- push_zmm(xmm0);
- push_zmm(xmm1);
- evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- evmovdqul(xmm1, src, Assembler::AVX_512bit);
- Assembler::pshuflw(xmm0, xmm1, mode);
- evmovdqul(dst, xmm0, Assembler::AVX_512bit);
- pop_zmm(xmm1);
- pop_zmm(xmm0);
- }
- }
- } else {
- Assembler::pshuflw(dst, src, mode);
- }
+ assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+ Assembler::pshuflw(dst, src, mode);
}
void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
@@ -4874,47 +4022,13 @@
}
void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- int nds_enc = nds->encoding();
- int dst_enc = dst->encoding();
- bool dst_upper_bank = (dst_enc > 15);
- bool nds_upper_bank = (nds_enc > 15);
- if (VM_Version::supports_avx512novl() &&
- (nds_upper_bank || dst_upper_bank)) {
- if (dst_upper_bank) {
- push_zmm(xmm0);
- movflt(xmm0, nds);
- vxorps(xmm0, xmm0, src, Assembler::AVX_128bit);
- movflt(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- movflt(dst, nds);
- vxorps(dst, dst, src, Assembler::AVX_128bit);
- }
- } else {
- vxorps(dst, nds, src, Assembler::AVX_128bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+ vxorps(dst, nds, src, Assembler::AVX_128bit);
}
void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
- int nds_enc = nds->encoding();
- int dst_enc = dst->encoding();
- bool dst_upper_bank = (dst_enc > 15);
- bool nds_upper_bank = (nds_enc > 15);
- if (VM_Version::supports_avx512novl() &&
- (nds_upper_bank || dst_upper_bank)) {
- if (dst_upper_bank) {
- push_zmm(xmm0);
- movdbl(xmm0, nds);
- vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit);
- movdbl(dst, xmm0);
- pop_zmm(xmm0);
- } else {
- movdbl(dst, nds);
- vxorpd(dst, dst, src, Assembler::AVX_128bit);
- }
- } else {
- vxorpd(dst, nds, src, Assembler::AVX_128bit);
- }
+ assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+ vxorpd(dst, nds, src, Assembler::AVX_128bit);
}
void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
@@ -7064,7 +6178,7 @@
cmpl(cnt1, 2*stride);
jcc(Assembler::less, SCAN_TO_8_CHAR_INIT);
movdl(vec1, ch);
- vpbroadcastw(vec1, vec1);
+ vpbroadcastw(vec1, vec1, Assembler::AVX_256bit);
vpxor(vec2, vec2);
movl(tmp, cnt1);
andl(tmp, 0xFFFFFFF0); //vector count (in chars)
@@ -7659,7 +6773,7 @@
movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector
movdl(vec2, tmp1);
- vpbroadcastd(vec2, vec2);
+ vpbroadcastd(vec2, vec2, Assembler::AVX_256bit);
bind(COMPARE_WIDE_VECTORS);
vmovdqu(vec1, Address(ary1, len, Address::times_1));
@@ -8091,7 +7205,7 @@
if (UseAVX > 2 && UseUnalignedLoadStores) {
// Fill 64-byte chunks
Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
- evpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
+ vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
subl(count, 16 << shift);
jcc(Assembler::less, L_check_fill_32_bytes);
@@ -8114,7 +7228,7 @@
} else if (UseAVX == 2 && UseUnalignedLoadStores) {
// Fill 64-byte chunks
Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
- vpbroadcastd(xtmp, xtmp);
+ vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit);
subl(count, 16 << shift);
jcc(Assembler::less, L_check_fill_32_bytes);
@@ -8256,7 +7370,7 @@
Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
movdl(tmp1Reg, tmp5);
- vpbroadcastd(tmp1Reg, tmp1Reg);
+ vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit);
jmp(L_chars_32_check);
bind(L_copy_32_chars);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp Mon Sep 24 16:37:28 2018 -0700
@@ -482,10 +482,6 @@
// from register xmm0. Otherwise, the value is stored from the FPU stack.
void store_double(Address dst);
- // Save/restore ZMM (512bit) register on stack.
- void push_zmm(XMMRegister reg);
- void pop_zmm(XMMRegister reg);
-
// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
void push_fTOS();
@@ -1214,9 +1210,11 @@
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
- void vpbroadcastw(XMMRegister dst, XMMRegister src);
+ void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -403,7 +403,7 @@
__ movdl(xmm0, rcx);
__ movl(rcx, 0xffff);
__ kmovwl(k1, rcx);
- __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
+ __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
__ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
#ifdef _LP64
__ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp Mon Sep 24 16:37:28 2018 -0700
@@ -816,7 +816,10 @@
static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
- static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
+ static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
+ static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
+ static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
+ supports_avx512bw() && supports_avx512dq()); }
static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
--- a/src/hotspot/cpu/x86/x86.ad Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/x86.ad Mon Sep 24 16:37:28 2018 -0700
@@ -729,6 +729,7 @@
);
reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
// Class for pre evex double registers
reg_class double_reg_legacy(XMM0, XMM0b,
@@ -789,6 +790,7 @@
);
reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
// Class for pre evex 32bit vector registers
reg_class vectors_reg_legacy(XMM0,
@@ -849,6 +851,7 @@
);
reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 64bit vector registers
reg_class vectord_reg_legacy(XMM0, XMM0b,
@@ -909,6 +912,7 @@
);
reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 128bit vector registers
reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
@@ -969,6 +973,7 @@
);
reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 256bit vector registers
reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
@@ -1029,9 +1034,10 @@
);
reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
// Class for all 512bit vector registers
-reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
+reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
@@ -1067,6 +1073,30 @@
#endif
);
+// Class for restricted 512bit vector registers
+reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
+#endif
+ );
+
+reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
+
reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
@@ -1487,6 +1517,8 @@
// AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
size = (UseAVX > 2) ? 64 : 32;
+ if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
+ size = (VM_Version::supports_avx512bw()) ? 64 : 32;
// Use flag to limit vector size.
size = MIN2(size,(int)MaxVectorSize);
// Minimum 2 values in vector (or 4 for bytes).
@@ -1528,7 +1560,7 @@
return MIN2(size,max_size);
}
-// Vector ideal reg corresponding to specidied size in bytes
+// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");
switch(size) {
@@ -1648,10 +1680,28 @@
case Op_VecS: // copy whole register
case Op_VecD:
case Op_VecX:
+#ifndef LP64
__ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2);
+ __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
+ }
+#endif
break;
case Op_VecY:
+#ifndef LP64
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2);
+ __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
+ }
+#endif
break;
case Op_VecZ:
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
@@ -1703,10 +1753,28 @@
__ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
break;
case Op_VecX:
+#ifndef LP64
__ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+ __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
+ }
+#endif
break;
case Op_VecY:
+#ifndef LP64
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ } else {
+ __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+ __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
+ }
+#endif
break;
case Op_VecZ:
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
@@ -1723,10 +1791,28 @@
__ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
break;
case Op_VecX:
+#ifndef LP64
__ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ }
+ else {
+ __ vextracti32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
+ }
+#endif
break;
case Op_VecY:
+#ifndef LP64
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+#else
+ if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+ __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ }
+ else {
+ __ vextracti64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
+ }
+#endif
break;
case Op_VecZ:
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
@@ -1908,7 +1994,6 @@
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.
-// This one generically applies only for evex, so only one version
operand vecZ() %{
constraint(ALLOC_IN_RC(vectorz_reg));
match(VecZ);
@@ -1917,6 +2002,14 @@
interface(REG_INTER);
%}
+operand legVecZ() %{
+ constraint(ALLOC_IN_RC(vectorz_reg_vl));
+ match(VecZ);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Comparison Code for FP conditional move
operand cmpOp_vcmppd() %{
match(Bool);
@@ -2547,22 +2640,8 @@
ins_pipe(pipe_slow);
%}
-instruct absF_reg_reg(regF dst, regF src) %{
- predicate(VM_Version::supports_avxonly());
- match(Set dst (AbsF src));
- ins_cost(150);
- format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-
-#ifdef _LP64
-instruct absF_reg_reg_evex(regF dst, regF src) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
+instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
+ predicate(UseAVX > 0);
match(Set dst (AbsF src));
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
@@ -2574,34 +2653,6 @@
ins_pipe(pipe_slow);
%}
-instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
- predicate(VM_Version::supports_avx512novl());
- match(Set dst (AbsF src1));
- effect(TEMP src2);
- ins_cost(150);
- format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-#else // _LP64
-instruct absF_reg_reg_evex(regF dst, regF src) %{
- predicate(UseAVX > 2);
- match(Set dst (AbsF src));
- ins_cost(150);
- format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-#endif
-
instruct absD_reg(regD dst) %{
predicate((UseSSE>=2) && (UseAVX == 0));
match(Set dst (AbsD dst));
@@ -2614,23 +2665,8 @@
ins_pipe(pipe_slow);
%}
-instruct absD_reg_reg(regD dst, regD src) %{
- predicate(VM_Version::supports_avxonly());
- match(Set dst (AbsD src));
- ins_cost(150);
- format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
- "# abs double by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-
-#ifdef _LP64
-instruct absD_reg_reg_evex(regD dst, regD src) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
+instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
+ predicate(UseAVX > 0);
match(Set dst (AbsD src));
ins_cost(150);
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
@@ -2643,35 +2679,6 @@
ins_pipe(pipe_slow);
%}
-instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
- predicate(VM_Version::supports_avx512novl());
- match(Set dst (AbsD src1));
- effect(TEMP src2);
- ins_cost(150);
- format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-#else // _LP64
-instruct absD_reg_reg_evex(regD dst, regD src) %{
- predicate(UseAVX > 2);
- match(Set dst (AbsD src));
- ins_cost(150);
- format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
- "# abs double by sign masking" %}
- ins_encode %{
- int vector_len = 0;
- __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
- %}
- ins_pipe(pipe_slow);
-%}
-#endif
-
instruct negF_reg(regF dst) %{
predicate((UseSSE>=1) && (UseAVX == 0));
match(Set dst (NegF dst));
@@ -2683,7 +2690,7 @@
ins_pipe(pipe_slow);
%}
-instruct negF_reg_reg(regF dst, regF src) %{
+instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
predicate(UseAVX > 0);
match(Set dst (NegF src));
ins_cost(150);
@@ -2707,11 +2714,11 @@
ins_pipe(pipe_slow);
%}
-instruct negD_reg_reg(regD dst, regD src) %{
+instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
predicate(UseAVX > 0);
match(Set dst (NegD src));
ins_cost(150);
- format %{ "vnegatess $dst, $src, [0x8000000000000000]\t"
+ format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
"# neg double by sign flipping" %}
ins_encode %{
__ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
@@ -2835,6 +2842,7 @@
// ====================VECTOR INSTRUCTIONS=====================================
+
// Load vectors (4 bytes long)
instruct loadV4(vecS dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 4);
@@ -2847,6 +2855,26 @@
ins_pipe( pipe_slow );
%}
+// Load vectors (4 bytes long)
+instruct MoveVecS2Leg(legVecS dst, vecS src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load vector (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (4 bytes long)
+instruct MoveLeg2VecS(vecS dst, legVecS src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load vector (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (8 bytes long)
instruct loadV8(vecD dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 8);
@@ -2859,6 +2887,26 @@
ins_pipe( pipe_slow );
%}
+// Load vectors (8 bytes long)
+instruct MoveVecD2Leg(legVecD dst, vecD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (8 bytes long)
+instruct MoveLeg2VecD(vecD dst, legVecD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (16 bytes long)
instruct loadV16(vecX dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
@@ -2871,6 +2919,36 @@
ins_pipe( pipe_slow );
%}
+// Load vectors (16 bytes long)
+instruct MoveVecX2Leg(legVecX dst, vecX src) %{
+ match(Set dst src);
+ format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (16 bytes long)
+instruct MoveLeg2VecX(vecX dst, legVecX src) %{
+ match(Set dst src);
+ format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (32 bytes long)
instruct loadV32(vecY dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 32);
@@ -2883,6 +2961,36 @@
ins_pipe( pipe_slow );
%}
+// Load vectors (32 bytes long)
+instruct MoveVecY2Leg(legVecY dst, vecY src) %{
+ match(Set dst src);
+ format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (32 bytes long)
+instruct MoveLeg2VecY(vecY dst, legVecY src) %{
+ match(Set dst src);
+ format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %}
+ ins_encode %{
+ if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+ __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load vectors (64 bytes long)
instruct loadV64_dword(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
@@ -2909,6 +3017,26 @@
ins_pipe( pipe_slow );
%}
+instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{
+ match(Set dst src);
+ format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{
+ match(Set dst src);
+ format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Store vectors
instruct storeV4(memory mem, vecS src) %{
predicate(n->as_StoreVector()->memory_size() == 4);
@@ -3068,6 +3196,44 @@
ins_pipe( pipe_slow );
%}
+instruct Repl64B(legVecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateB (LoadB mem)));
+ format %{ "punpcklbw $dst,$mem\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %}
+ ins_encode %{
+ __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl16B_imm(vecX dst, immI con) %{
predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
@@ -3094,6 +3260,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl64B_imm(legVecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateB con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4S(vecD dst, rRegI src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
@@ -3198,6 +3380,56 @@
ins_pipe( pipe_slow );
%}
+instruct Repl32S(legVecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateS src));
+ format %{ "movd $dst,$src\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateS (LoadS mem)));
+ format %{ "pshuflw $dst,$mem,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %}
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_imm(legVecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+ match(Set dst (ReplicateS con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4I(vecX dst, rRegI src) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
@@ -3246,6 +3478,36 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16I(legVecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI (LoadI mem)));
+ format %{ "pshufd $dst,$mem,0x00\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4I_imm(vecX dst, immI con) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
@@ -3272,6 +3534,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16I_imm(legVecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateI con));
+ format %{ "movq $dst,[$constantaddress]\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Long could be loaded into xmm register directly from memory.
instruct Repl2L_mem(vecX dst, memory mem) %{
predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw());
@@ -3300,8 +3578,24 @@
%}
ins_pipe( pipe_slow );
%}
+
+instruct Repl8L(legVecZ dst, rRegL src) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL src));
+ format %{ "movdq $dst,$src\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
#else // _LP64
-instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
@@ -3319,6 +3613,27 @@
%}
ins_pipe( pipe_slow );
%}
+
+instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL src));
+ effect(TEMP dst, USE src, TEMP tmp);
+ format %{ "movdl $dst,$src.lo\n\t"
+ "movdl $tmp,$src.hi\n\t"
+ "punpckldq $dst,$tmp\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
#endif // _LP64
instruct Repl4L_imm(vecY dst, immL con) %{
@@ -3335,6 +3650,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl8L_imm(legVecZ dst, immL con) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress($con));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl4L_mem(vecY dst, memory mem) %{
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
@@ -3349,6 +3680,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl8L_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateL (LoadL mem)));
+ format %{ "movq $dst,$mem\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl2F_mem(vecD dst, memory mem) %{
predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
@@ -3369,8 +3716,8 @@
ins_pipe( pipe_slow );
%}
-instruct Repl8F(vecY dst, regF src) %{
- predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+instruct Repl8F(vecY dst, vlRegF src) %{
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$src,0x00\n\t"
"vinsertf128_high $dst,$dst\t! replicate8F" %}
@@ -3393,6 +3740,34 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16F(legVecZ dst, vlRegF src) %{
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateF src));
+ format %{ "pshufd $dst,$src,0x00\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16F_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateF (LoadF mem)));
+ format %{ "pshufd $dst,$mem,0x00\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct Repl2F_zero(vecD dst, immF0 zero) %{
predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
match(Set dst (ReplicateF zero));
@@ -3434,8 +3809,8 @@
ins_pipe( pipe_slow );
%}
-instruct Repl4D(vecY dst, regD src) %{
- predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+instruct Repl4D(vecY dst, vlRegD src) %{
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\n\t"
"vinsertf128_high $dst,$dst\t! replicate4D" %}
@@ -3458,6 +3833,34 @@
ins_pipe( pipe_slow );
%}
+instruct Repl8D(legVecZ dst, vlRegD src) %{
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateD src));
+ format %{ "pshufd $dst,$src,0x44\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8D_mem(legVecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (ReplicateD (LoadD mem)));
+ format %{ "pshufd $dst,$mem,0x44\n\t"
+ "vinsertf128_high $dst,$dst\t"
+ "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate double (8 byte) scalar zero to be vector
instruct Repl2D_zero(vecX dst, immD0 zero) %{
predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
@@ -3736,7 +4139,7 @@
ins_pipe( pipe_slow );
%}
#else // _LP64
-instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
+instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
@@ -3791,7 +4194,7 @@
%}
// Replicate float (4 byte) scalar to be vector
-instruct Repl2F(vecD dst, regF src) %{
+instruct Repl2F(vecD dst, vlRegF src) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
@@ -3801,7 +4204,7 @@
ins_pipe( fpu_reg_reg );
%}
-instruct Repl4F(vecX dst, regF src) %{
+instruct Repl4F(vecX dst, vlRegF src) %{
predicate(n->as_Vector()->length() == 4);
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
@@ -3812,7 +4215,7 @@
%}
// Replicate double (8 bytes) scalar to be vector
-instruct Repl2D(vecX dst, regD src) %{
+instruct Repl2D(vecX dst, vlRegD src) %{
predicate(n->as_Vector()->length() == 2);
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
@@ -3825,31 +4228,31 @@
// ====================EVEX REPLICATE=============================================
instruct Repl4B_mem_evex(vecS dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8B_mem_evex(vecD dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_evex(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB src));
- format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
+ format %{ "evpbroadcastb $dst,$src\t! replicate16B" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3858,20 +4261,20 @@
%}
instruct Repl16B_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_evex(vecY dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB src));
- format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
+ format %{ "evpbroadcastb $dst,$src\t! replicate32B" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3880,20 +4283,20 @@
%}
instruct Repl32B_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl64B_evex(vecZ dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateB src));
- format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
+ format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3902,51 +4305,51 @@
%}
instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
- predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateB (LoadB mem)));
format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16B_imm_evex(vecX dst, immI con) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastb $dst,$dst\t! replicate16B" %}
ins_encode %{
int vector_len = 0;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
- __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32B_imm_evex(vecY dst, immI con) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastb $dst,$dst\t! replicate32B" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
- __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl64B_imm_evex(vecZ dst, immI con) %{
- predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastb $dst,$dst\t! upper replicate64B" %}
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
- __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3964,9 +4367,9 @@
%}
instruct Repl4S_evex(vecD dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate4S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate4S" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3975,20 +4378,20 @@
%}
instruct Repl4S_mem_evex(vecD dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8S_evex(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate8S" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3997,20 +4400,20 @@
%}
instruct Repl8S_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_evex(vecY dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate16S" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4019,20 +4422,20 @@
%}
instruct Repl16S_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32S_evex(vecZ dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateS src));
- format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
+ format %{ "evpbroadcastw $dst,$src\t! replicate32S" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4041,51 +4444,51 @@
%}
instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8S_imm_evex(vecX dst, immI con) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastw $dst,$dst\t! replicate8S" %}
ins_encode %{
int vector_len = 0;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
- __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl16S_imm_evex(vecY dst, immI con) %{
- predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+ predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastw $dst,$dst\t! replicate16S" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
- __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl32S_imm_evex(vecZ dst, immI con) %{
- predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+ predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastw $dst,$dst\t! replicate32S" %}
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
- __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4103,9 +4506,9 @@
%}
instruct Repl4I_evex(vecX dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
- format %{ "vpbroadcastd $dst,$src\t! replicate4I" %}
+ format %{ "evpbroadcastd $dst,$src\t! replicate4I" %}
ins_encode %{
int vector_len = 0;
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4114,20 +4517,20 @@
%}
instruct Repl4I_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_evex(vecY dst, rRegI src) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI src));
- format %{ "vpbroadcastd $dst,$src\t! replicate8I" %}
+ format %{ "evpbroadcastd $dst,$src\t! replicate8I" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4136,12 +4539,12 @@
%}
instruct Repl8I_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4149,7 +4552,7 @@
instruct Repl16I_evex(vecZ dst, rRegI src) %{
predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
match(Set dst (ReplicateI src));
- format %{ "vpbroadcastd $dst,$src\t! replicate16I" %}
+ format %{ "evpbroadcastd $dst,$src\t! replicate16I" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4163,33 +4566,33 @@
format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4I_imm_evex(vecX dst, immI con) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"vpbroadcastd $dst,$dst\t! replicate4I" %}
ins_encode %{
int vector_len = 0;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
- __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8I_imm_evex(vecY dst, immI con) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"vpbroadcastd $dst,$dst\t! replicate8I" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
- __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4202,7 +4605,7 @@
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
- __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4222,9 +4625,9 @@
// Replicate long (8 byte) scalar to be vector
#ifdef _LP64
instruct Repl4L_evex(vecY dst, rRegL src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
- format %{ "vpbroadcastq $dst,$src\t! replicate4L" %}
+ format %{ "evpbroadcastq $dst,$src\t! replicate4L" %}
ins_encode %{
int vector_len = 1;
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4235,7 +4638,7 @@
instruct Repl8L_evex(vecZ dst, rRegL src) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateL src));
- format %{ "vpbroadcastq $dst,$src\t! replicate8L" %}
+ format %{ "evpbroadcastq $dst,$src\t! replicate8L" %}
ins_encode %{
int vector_len = 2;
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4244,7 +4647,7 @@
%}
#else // _LP64
instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "movdl $dst,$src.lo\n\t"
@@ -4256,12 +4659,12 @@
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
@@ -4274,21 +4677,21 @@
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
instruct Repl4L_imm_evex(vecY dst, immL con) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL con));
format %{ "movq $dst,[$constantaddress]\n\t"
"vpbroadcastq $dst,$dst\t! replicate4L" %}
ins_encode %{
int vector_len = 1;
__ movq($dst$$XMMRegister, $constantaddress($con));
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4301,29 +4704,29 @@
ins_encode %{
int vector_len = 2;
__ movq($dst$$XMMRegister, $constantaddress($con));
- __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl2L_mem_evex(vecX dst, memory mem) %{
- predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %}
ins_encode %{
int vector_len = 0;
- __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4L_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateL (LoadL mem)));
format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4334,7 +4737,7 @@
format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4352,23 +4755,23 @@
%}
instruct Repl8F_evex(vecY dst, regF src) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
- format %{ "vbroadcastss $dst,$src\t! replicate8F" %}
+ format %{ "vpbroadcastss $dst,$src\t! replicate8F" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl8F_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
format %{ "vbroadcastss $dst,$mem\t! replicate8F" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4376,10 +4779,10 @@
instruct Repl16F_evex(vecZ dst, regF src) %{
predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
match(Set dst (ReplicateF src));
- format %{ "vbroadcastss $dst,$src\t! replicate16F" %}
+ format %{ "vpbroadcastss $dst,$src\t! replicate16F" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4390,7 +4793,7 @@
format %{ "vbroadcastss $dst,$mem\t! replicate16F" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4444,23 +4847,23 @@
%}
instruct Repl4D_evex(vecY dst, regD src) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
- format %{ "vbroadcastsd $dst,$src\t! replicate4D" %}
+ format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct Repl4D_mem_evex(vecY dst, memory mem) %{
- predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+ predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
match(Set dst (ReplicateD (LoadD mem)));
format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %}
ins_encode %{
int vector_len = 1;
- __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4468,10 +4871,10 @@
instruct Repl8D_evex(vecZ dst, regD src) %{
predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
match(Set dst (ReplicateD src));
- format %{ "vbroadcastsd $dst,$src\t! replicate8D" %}
+ format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4482,7 +4885,7 @@
format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %}
ins_encode %{
int vector_len = 2;
- __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+ __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4525,7 +4928,7 @@
// ====================REDUCTION ARITHMETIC=======================================
-instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseSSE > 2 && UseAVX == 0);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp2, TEMP tmp);
@@ -4544,7 +4947,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4562,7 +4965,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4582,7 +4985,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseSSE > 2 && UseAVX == 0);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4603,7 +5006,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4623,7 +5026,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4647,7 +5050,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(VM_Version::supports_avxonly());
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4671,7 +5074,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4699,7 +5102,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
@@ -4731,7 +5134,7 @@
%}
#ifdef _LP64
-instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4750,7 +5153,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4773,7 +5176,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -4801,7 +5204,7 @@
%}
#endif
-instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
@@ -4816,7 +5219,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
@@ -4831,7 +5234,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
@@ -4854,7 +5257,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP tmp, TEMP dst);
@@ -4877,7 +5280,7 @@
ins_pipe( pipe_slow );
%}
-instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
+instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -4916,7 +5319,7 @@
ins_pipe( pipe_slow );
%}
-instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
+instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -4987,7 +5390,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
@@ -5002,7 +5405,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
@@ -5017,14 +5420,14 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
+instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 0);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2,0x1\n\t"
+ "vextractf128 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
@@ -5032,7 +5435,7 @@
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5040,7 +5443,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
+instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (AddReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5079,7 +5482,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseSSE > 3 && UseAVX == 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5098,7 +5501,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5118,7 +5521,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseSSE > 3 && UseAVX == 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5141,7 +5544,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5165,8 +5568,8 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0);
+instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
+ predicate(UseAVX > 1);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
format %{ "vextracti128_high $tmp,$src2\n\t"
@@ -5193,7 +5596,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
@@ -5225,7 +5628,7 @@
%}
#ifdef _LP64
-instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5244,7 +5647,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5267,7 +5670,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -5295,7 +5698,7 @@
%}
#endif
-instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
+instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
@@ -5310,7 +5713,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst);
@@ -5325,7 +5728,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP dst, TEMP tmp);
@@ -5348,7 +5751,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst);
@@ -5371,7 +5774,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
+instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5410,7 +5813,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVF dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5481,7 +5884,7 @@
ins_pipe( pipe_slow );
%}
-instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVD dst src2));
effect(TEMP dst, TEMP tmp);
@@ -5496,7 +5899,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst);
@@ -5511,7 +5914,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
+instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{
predicate(UseAVX > 0);
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5534,7 +5937,7 @@
ins_pipe( pipe_slow );
%}
-instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
+instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
predicate(UseAVX > 2);
match(Set dst (MulReductionVD dst src2));
effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5588,19 +5991,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
ins_encode %{
@@ -5610,20 +6002,9 @@
ins_pipe( pipe_slow );
%}
-instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+
+instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
ins_encode %{
@@ -5633,29 +6014,6 @@
ins_pipe( pipe_slow );
%}
-instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd8B(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB dst src));
@@ -5666,19 +6024,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
ins_encode %{
@@ -5688,20 +6035,9 @@
ins_pipe( pipe_slow );
%}
-instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+
+instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
ins_encode %{
@@ -5711,29 +6047,6 @@
ins_pipe( pipe_slow );
%}
-instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd16B(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
match(Set dst (AddVB dst src));
@@ -5744,19 +6057,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
ins_encode %{
@@ -5766,31 +6068,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
ins_encode %{
@@ -5800,31 +6079,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src1 src2));
- format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
ins_encode %{
@@ -5834,20 +6090,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB dst src2));
- effect(TEMP src1);
- format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
ins_encode %{
@@ -5857,31 +6101,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB src (LoadVector mem)));
- format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (AddVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
ins_encode %{
@@ -5892,7 +6113,7 @@
%}
instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %}
ins_encode %{
@@ -5913,19 +6134,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
ins_encode %{
@@ -5935,20 +6145,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
ins_encode %{
@@ -5958,29 +6156,6 @@
ins_pipe( pipe_slow );
%}
-instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd4S(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVS dst src));
@@ -5991,19 +6166,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
ins_encode %{
@@ -6013,20 +6177,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
ins_encode %{
@@ -6036,29 +6188,6 @@
ins_pipe( pipe_slow );
%}
-instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd8S(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVS dst src));
@@ -6069,19 +6198,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
ins_encode %{
@@ -6091,31 +6209,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
ins_encode %{
@@ -6125,31 +6220,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS src1 src2));
- format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
ins_encode %{
@@ -6159,20 +6231,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS dst src2));
- effect(TEMP src1);
- format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
ins_encode %{
@@ -6182,31 +6242,8 @@
ins_pipe( pipe_slow );
%}
-instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS src (LoadVector mem)));
- format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (AddVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %}
ins_encode %{
@@ -6217,7 +6254,7 @@
%}
instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %}
ins_encode %{
@@ -6229,7 +6266,7 @@
// Integers vector add
instruct vadd2I(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVI dst src));
format %{ "paddd $dst,$src\t! add packed2I" %}
ins_encode %{
@@ -6261,7 +6298,7 @@
%}
instruct vadd4I(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVI dst src));
format %{ "paddd $dst,$src\t! add packed4I" %}
ins_encode %{
@@ -6338,7 +6375,7 @@
// Longs vector add
instruct vadd2L(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVL dst src));
format %{ "paddq $dst,$src\t! add packed2L" %}
ins_encode %{
@@ -6415,7 +6452,7 @@
// Floats vector add
instruct vadd2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVF dst src));
format %{ "addps $dst,$src\t! add packed2F" %}
ins_encode %{
@@ -6447,7 +6484,7 @@
%}
instruct vadd4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (AddVF dst src));
format %{ "addps $dst,$src\t! add packed4F" %}
ins_encode %{
@@ -6524,7 +6561,7 @@
// Doubles vector add
instruct vadd2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (AddVD dst src));
format %{ "addpd $dst,$src\t! add packed2D" %}
ins_encode %{
@@ -6612,19 +6649,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
ins_encode %{
@@ -6634,20 +6660,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
ins_encode %{
@@ -6657,29 +6671,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub8B(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVB dst src));
@@ -6690,19 +6681,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
ins_encode %{
@@ -6712,20 +6692,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
ins_encode %{
@@ -6735,29 +6703,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub16B(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
match(Set dst (SubVB dst src));
@@ -6768,19 +6713,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
ins_encode %{
@@ -6790,31 +6724,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
ins_encode %{
@@ -6824,31 +6735,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB src1 src2));
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
ins_encode %{
@@ -6858,20 +6746,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB dst src2));
- effect(TEMP src1);
- format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
ins_encode %{
@@ -6881,31 +6757,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB src (LoadVector mem)));
- format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
- match(Set dst (SubVB dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %}
ins_encode %{
@@ -6916,7 +6769,7 @@
%}
instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %}
ins_encode %{
@@ -6937,19 +6790,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
ins_encode %{
@@ -6959,20 +6801,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
ins_encode %{
@@ -6982,29 +6812,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub4S(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVS dst src));
@@ -7015,19 +6822,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
ins_encode %{
@@ -7037,20 +6833,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
ins_encode %{
@@ -7060,29 +6844,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub8S(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS dst src));
@@ -7093,19 +6854,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
ins_encode %{
@@ -7115,31 +6865,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
ins_encode %{
@@ -7149,31 +6876,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS src1 src2));
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
ins_encode %{
@@ -7183,20 +6887,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS dst src2));
- effect(TEMP src1);
- format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
ins_encode %{
@@ -7206,31 +6898,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS src (LoadVector mem)));
- format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (SubVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %}
ins_encode %{
@@ -7241,7 +6910,7 @@
%}
instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %}
ins_encode %{
@@ -7253,7 +6922,7 @@
// Integers vector sub
instruct vsub2I(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVI dst src));
format %{ "psubd $dst,$src\t! sub packed2I" %}
ins_encode %{
@@ -7285,7 +6954,7 @@
%}
instruct vsub4I(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVI dst src));
format %{ "psubd $dst,$src\t! sub packed4I" %}
ins_encode %{
@@ -7362,7 +7031,7 @@
// Longs vector sub
instruct vsub2L(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVL dst src));
format %{ "psubq $dst,$src\t! sub packed2L" %}
ins_encode %{
@@ -7439,7 +7108,7 @@
// Floats vector sub
instruct vsub2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVF dst src));
format %{ "subps $dst,$src\t! sub packed2F" %}
ins_encode %{
@@ -7471,7 +7140,7 @@
%}
instruct vsub4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (SubVF dst src));
format %{ "subps $dst,$src\t! sub packed4F" %}
ins_encode %{
@@ -7548,7 +7217,7 @@
// Doubles vector sub
instruct vsub2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (SubVD dst src));
format %{ "subpd $dst,$src\t! sub packed2D" %}
ins_encode %{
@@ -7636,19 +7305,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
ins_encode %{
@@ -7658,20 +7316,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
ins_encode %{
@@ -7681,29 +7327,6 @@
ins_pipe( pipe_slow );
%}
-instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vmul4S(vecD dst, vecD src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (MulVS dst src));
@@ -7714,19 +7337,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
ins_encode %{
@@ -7736,20 +7348,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
ins_encode %{
@@ -7759,29 +7359,6 @@
ins_pipe( pipe_slow );
%}
-instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vmul8S(vecX dst, vecX src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (MulVS dst src));
@@ -7792,19 +7369,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
ins_encode %{
@@ -7814,31 +7380,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
ins_encode %{
@@ -7848,31 +7391,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS src1 src2));
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
ins_encode %{
@@ -7882,20 +7402,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS dst src2));
- effect(TEMP src1);
- format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
ins_encode %{
@@ -7905,31 +7413,8 @@
ins_pipe( pipe_slow );
%}
-instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS src (LoadVector mem)));
- format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (MulVS dst (LoadVector mem)));
- effect(TEMP src);
- format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
ins_encode %{
@@ -7940,7 +7425,7 @@
%}
instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
ins_encode %{
@@ -8127,7 +7612,7 @@
// Floats vector mul
instruct vmul2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVF dst src));
format %{ "mulps $dst,$src\t! mul packed2F" %}
ins_encode %{
@@ -8159,7 +7644,7 @@
%}
instruct vmul4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (MulVF dst src));
format %{ "mulps $dst,$src\t! mul packed4F" %}
ins_encode %{
@@ -8236,7 +7721,7 @@
// Doubles vector mul
instruct vmul2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (MulVD dst src));
format %{ "mulpd $dst,$src\t! mul packed2D" %}
ins_encode %{
@@ -8311,8 +7796,8 @@
ins_pipe( pipe_slow );
%}
-instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
- predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8);
+instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
@@ -8327,8 +7812,8 @@
ins_pipe( pipe_slow );
%}
-instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
- predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4);
+instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
@@ -8347,7 +7832,7 @@
// Floats vector div
instruct vdiv2F(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (DivVF dst src));
format %{ "divps $dst,$src\t! div packed2F" %}
ins_encode %{
@@ -8379,7 +7864,7 @@
%}
instruct vdiv4F(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (DivVF dst src));
format %{ "divps $dst,$src\t! div packed4F" %}
ins_encode %{
@@ -8456,7 +7941,7 @@
// Doubles vector div
instruct vdiv2D(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (DivVD dst src));
format %{ "divpd $dst,$src\t! div packed2D" %}
ins_encode %{
@@ -8725,19 +8210,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
ins_encode %{
@@ -8747,20 +8221,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
ins_encode %{
@@ -8770,29 +8232,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsll4S(vecD dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVS dst shift));
@@ -8813,19 +8252,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
@@ -8835,20 +8263,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
@@ -8858,29 +8274,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsll8S(vecX dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS dst shift));
@@ -8901,19 +8294,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
@@ -8923,31 +8305,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
@@ -8957,31 +8316,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
@@ -8991,20 +8327,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
@@ -9014,31 +8338,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
ins_encode %{
@@ -9049,7 +8350,7 @@
%}
instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
ins_encode %{
@@ -9061,7 +8362,7 @@
// Integers vector left shift
instruct vsll2I(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed2I" %}
ins_encode %{
@@ -9071,7 +8372,7 @@
%}
instruct vsll2I_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed2I" %}
ins_encode %{
@@ -9103,7 +8404,7 @@
%}
instruct vsll4I(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed4I" %}
ins_encode %{
@@ -9113,7 +8414,7 @@
%}
instruct vsll4I_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (LShiftVI dst shift));
format %{ "pslld $dst,$shift\t! left shift packed4I" %}
ins_encode %{
@@ -9190,7 +8491,7 @@
// Longs vector left shift
instruct vsll2L(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVL dst shift));
format %{ "psllq $dst,$shift\t! left shift packed2L" %}
ins_encode %{
@@ -9200,7 +8501,7 @@
%}
instruct vsll2L_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (LShiftVL dst shift));
format %{ "psllq $dst,$shift\t! left shift packed2L" %}
ins_encode %{
@@ -9302,19 +8603,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
@@ -9324,20 +8614,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
@@ -9347,29 +8625,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsrl4S(vecD dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS dst shift));
@@ -9390,19 +8645,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
@@ -9412,20 +8656,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
@@ -9435,29 +8667,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsrl8S(vecX dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS dst shift));
@@ -9478,19 +8687,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
@@ -9500,31 +8698,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
@@ -9534,31 +8709,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
@@ -9568,20 +8720,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
@@ -9591,31 +8731,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
ins_encode %{
@@ -9626,7 +8743,7 @@
%}
instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
ins_encode %{
@@ -9638,7 +8755,7 @@
// Integers vector logical right shift
instruct vsrl2I(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
ins_encode %{
@@ -9648,7 +8765,7 @@
%}
instruct vsrl2I_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
ins_encode %{
@@ -9680,7 +8797,7 @@
%}
instruct vsrl4I(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
ins_encode %{
@@ -9690,7 +8807,7 @@
%}
instruct vsrl4I_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (URShiftVI dst shift));
format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
ins_encode %{
@@ -9767,7 +8884,7 @@
// Longs vector logical right shift
instruct vsrl2L(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVL dst shift));
format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
ins_encode %{
@@ -9777,7 +8894,7 @@
%}
instruct vsrl2L_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (URShiftVL dst shift));
format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
ins_encode %{
@@ -9866,7 +8983,7 @@
%}
instruct vsra2S_imm(vecS dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS dst shift));
format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
@@ -9875,19 +8992,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
+instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
@@ -9897,20 +9003,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
@@ -9920,29 +9014,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsra4S(vecD dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS dst shift));
@@ -9963,19 +9034,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
+instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
@@ -9985,20 +9045,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
@@ -10008,29 +9056,6 @@
ins_pipe( pipe_slow );
%}
-instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsra8S(vecX dst, vecS shift) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS dst shift));
@@ -10051,19 +9076,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
@@ -10073,31 +9087,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
+instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
@@ -10107,31 +9098,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
+instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
@@ -10141,20 +9109,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
@@ -10164,31 +9120,8 @@
ins_pipe( pipe_slow );
%}
-instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
- predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS dst shift));
- effect(TEMP src);
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
ins_encode %{
@@ -10199,7 +9132,7 @@
%}
instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
ins_encode %{
@@ -10211,7 +9144,7 @@
// Integers vector arithmetic right shift
instruct vsra2I(vecD dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
ins_encode %{
@@ -10221,7 +9154,7 @@
%}
instruct vsra2I_imm(vecD dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
ins_encode %{
@@ -10253,7 +9186,7 @@
%}
instruct vsra4I(vecX dst, vecS shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
ins_encode %{
@@ -10263,7 +9196,7 @@
%}
instruct vsra4I_imm(vecX dst, immI8 shift) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
match(Set dst (RShiftVI dst shift));
format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
ins_encode %{
@@ -10344,7 +9277,7 @@
// --------------------------------- AND --------------------------------------
instruct vand4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length_in_bytes() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
match(Set dst (AndV dst src));
format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
ins_encode %{
@@ -10376,7 +9309,7 @@
%}
instruct vand8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
match(Set dst (AndV dst src));
format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
ins_encode %{
@@ -10408,7 +9341,7 @@
%}
instruct vand16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length_in_bytes() == 16);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
match(Set dst (AndV dst src));
format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
ins_encode %{
@@ -10486,7 +9419,7 @@
// --------------------------------- OR ---------------------------------------
instruct vor4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length_in_bytes() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
match(Set dst (OrV dst src));
format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
ins_encode %{
@@ -10518,7 +9451,7 @@
%}
instruct vor8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
match(Set dst (OrV dst src));
format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
ins_encode %{
@@ -10550,7 +9483,7 @@
%}
instruct vor16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length_in_bytes() == 16);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
match(Set dst (OrV dst src));
format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
ins_encode %{
@@ -10628,7 +9561,7 @@
// --------------------------------- XOR --------------------------------------
instruct vxor4B(vecS dst, vecS src) %{
- predicate(n->as_Vector()->length_in_bytes() == 4);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
match(Set dst (XorV dst src));
format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
ins_encode %{
@@ -10660,7 +9593,7 @@
%}
instruct vxor8B(vecD dst, vecD src) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
match(Set dst (XorV dst src));
format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
ins_encode %{
@@ -10692,7 +9625,7 @@
%}
instruct vxor16B(vecX dst, vecX src) %{
- predicate(n->as_Vector()->length_in_bytes() == 16);
+ predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
match(Set dst (XorV dst src));
format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
ins_encode %{
--- a/src/hotspot/cpu/x86/x86_32.ad Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/x86_32.ad Mon Sep 24 16:37:28 2018 -0700
@@ -4101,6 +4101,15 @@
interface(REG_INTER);
%}
+// Float register operands
+operand vlRegF() %{
+ constraint(ALLOC_IN_RC(float_reg_vl));
+ match(RegF);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// XMM Double register operands
operand regD() %{
predicate( UseSSE>=2 );
@@ -4110,6 +4119,15 @@
interface(REG_INTER);
%}
+// Double register operands
+operand vlRegD() %{
+ constraint(ALLOC_IN_RC(double_reg_vl));
+ match(RegD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
// runtime code generation via reg_class_dynamic.
operand vecS() %{
@@ -4120,6 +4138,14 @@
interface(REG_INTER);
%}
+operand legVecS() %{
+ constraint(ALLOC_IN_RC(vectors_reg_legacy));
+ match(VecS);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
operand vecD() %{
constraint(ALLOC_IN_RC(vectord_reg_legacy));
match(VecD);
@@ -4128,6 +4154,14 @@
interface(REG_INTER);
%}
+operand legVecD() %{
+ constraint(ALLOC_IN_RC(vectord_reg_legacy));
+ match(VecD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
operand vecX() %{
constraint(ALLOC_IN_RC(vectorx_reg_legacy));
match(VecX);
@@ -4136,6 +4170,14 @@
interface(REG_INTER);
%}
+operand legVecX() %{
+ constraint(ALLOC_IN_RC(vectorx_reg_legacy));
+ match(VecX);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
operand vecY() %{
constraint(ALLOC_IN_RC(vectory_reg_legacy));
match(VecY);
@@ -4144,6 +4186,14 @@
interface(REG_INTER);
%}
+operand legVecY() %{
+ constraint(ALLOC_IN_RC(vectory_reg_legacy));
+ match(VecY);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
//----------Memory Operands----------------------------------------------------
// Direct Memory Operand
operand direct(immP addr) %{
@@ -6515,6 +6565,26 @@
ins_pipe( pipe_slow );
%}
+// Load Double
+instruct MoveD2VL(vlRegD dst, regD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Double
+instruct MoveVL2D(regD dst, vlRegD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Store XMM register to memory (single-precision floating point)
// MOVSS instruction
instruct storeF(memory mem, regF src) %{
@@ -6528,6 +6598,26 @@
ins_pipe( pipe_slow );
%}
+// Load Float
+instruct MoveF2VL(vlRegF dst, regF src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load float (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Float
+instruct MoveVL2F(regF dst, vlRegF src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load float (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Store Float
instruct storeFPR( memory mem, regFPR1 src) %{
predicate(UseSSE==0);
--- a/src/hotspot/cpu/x86/x86_64.ad Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/cpu/x86/x86_64.ad Mon Sep 24 16:37:28 2018 -0700
@@ -3656,6 +3656,15 @@
interface(REG_INTER);
%}
+// Float register operands
+operand vlRegF() %{
+ constraint(ALLOC_IN_RC(float_reg_vl));
+ match(RegF);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Double register operands
operand regD() %{
constraint(ALLOC_IN_RC(double_reg));
@@ -3665,9 +3674,27 @@
interface(REG_INTER);
%}
+// Double register operands
+operand vlRegD() %{
+ constraint(ALLOC_IN_RC(double_reg_vl));
+ match(RegD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Vectors
operand vecS() %{
- constraint(ALLOC_IN_RC(vectors_reg));
+ constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
+ match(VecS);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+// Vectors
+operand legVecS() %{
+ constraint(ALLOC_IN_RC(vectors_reg_legacy));
match(VecS);
format %{ %}
@@ -3675,7 +3702,15 @@
%}
operand vecD() %{
- constraint(ALLOC_IN_RC(vectord_reg));
+ constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
+ match(VecD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand legVecD() %{
+ constraint(ALLOC_IN_RC(vectord_reg_legacy));
match(VecD);
format %{ %}
@@ -3683,7 +3718,15 @@
%}
operand vecX() %{
- constraint(ALLOC_IN_RC(vectorx_reg));
+ constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
+ match(VecX);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand legVecX() %{
+ constraint(ALLOC_IN_RC(vectorx_reg_legacy));
match(VecX);
format %{ %}
@@ -3691,7 +3734,15 @@
%}
operand vecY() %{
- constraint(ALLOC_IN_RC(vectory_reg));
+ constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
+ match(VecY);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand legVecY() %{
+ constraint(ALLOC_IN_RC(vectory_reg_legacy));
match(VecY);
format %{ %}
@@ -5287,6 +5338,26 @@
ins_pipe(pipe_slow); // XXX
%}
+// Load Float
+instruct MoveF2VL(vlRegF dst, regF src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load float (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Float
+instruct MoveVL2F(regF dst, vlRegF src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t! load float (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load Double
instruct loadD_partial(regD dst, memory mem)
%{
@@ -5314,6 +5385,26 @@
ins_pipe(pipe_slow); // XXX
%}
+// Load Double
+instruct MoveD2VL(vlRegD dst, regD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Double
+instruct MoveVL2D(regD dst, vlRegD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load Effective Address
instruct leaP8(rRegP dst, indOffset8 mem)
%{
@@ -10858,7 +10949,7 @@
%}
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
- rax_RegI result, regD tmp1, rFlagsReg cr)
+ rax_RegI result, legVecS tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -10874,7 +10965,7 @@
%}
instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
- rax_RegI result, regD tmp1, rFlagsReg cr)
+ rax_RegI result, legVecS tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -10890,7 +10981,7 @@
%}
instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
- rax_RegI result, regD tmp1, rFlagsReg cr)
+ rax_RegI result, legVecS tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -10906,7 +10997,7 @@
%}
instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
- rax_RegI result, regD tmp1, rFlagsReg cr)
+ rax_RegI result, legVecS tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -10923,7 +11014,7 @@
// fast search of substring with known size.
instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
- rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
@@ -10952,7 +11043,7 @@
// fast search of substring with known size.
instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
- rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
@@ -10981,7 +11072,7 @@
// fast search of substring with known size.
instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
- rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
@@ -11009,7 +11100,7 @@
%}
instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
- rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -11026,7 +11117,7 @@
%}
instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
- rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -11043,7 +11134,7 @@
%}
instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
- rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
@@ -11060,7 +11151,7 @@
%}
instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
- rbx_RegI result, regD vec1, regD vec2, regD vec3, rcx_RegI tmp, rFlagsReg cr)
+ rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
%{
predicate(UseSSE42Intrinsics);
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
@@ -11075,7 +11166,7 @@
// fast string equals
instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
- regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
+ legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
%{
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
@@ -11091,7 +11182,7 @@
// fast array equals
instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
- regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
+ legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (AryEq ary1 ary2));
@@ -11107,7 +11198,7 @@
%}
instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
- regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
+ legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (AryEq ary1 ary2));
@@ -11123,7 +11214,7 @@
%}
instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
- regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
+ legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
%{
match(Set result (HasNegatives ary1 len));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
@@ -11138,7 +11229,7 @@
%}
// fast char[] to byte[] compression
-instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
@@ -11154,7 +11245,7 @@
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
- regD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
+ legVecS tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
@@ -11168,7 +11259,7 @@
// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
- regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+ legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
--- a/src/hotspot/share/c1/c1_LIR.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/share/c1/c1_LIR.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -472,7 +472,6 @@
case lir_pop: // input always valid, result and info always invalid
case lir_return: // input always valid, result and info always invalid
case lir_leal: // input and result always valid, info always invalid
- case lir_neg: // input and result always valid, info always invalid
case lir_monaddr: // input and result always valid, info always invalid
case lir_null_check: // input and info always valid, result always invalid
case lir_move: // input and result always valid, may have info
@@ -580,6 +579,7 @@
case lir_rem:
case lir_sqrt:
case lir_abs:
+ case lir_neg:
case lir_logic_and:
case lir_logic_or:
case lir_logic_xor:
@@ -1662,7 +1662,6 @@
case lir_null_check: s = "null_check"; break;
case lir_return: s = "return"; break;
case lir_safepoint: s = "safepoint"; break;
- case lir_neg: s = "neg"; break;
case lir_leal: s = "leal"; break;
case lir_branch: s = "branch"; break;
case lir_cond_float_branch: s = "flt_cond_br"; break;
@@ -1690,6 +1689,7 @@
case lir_div_strictfp: s = "div_strictfp"; break;
case lir_rem: s = "rem"; break;
case lir_abs: s = "abs"; break;
+ case lir_neg: s = "neg"; break;
case lir_sqrt: s = "sqrt"; break;
case lir_logic_and: s = "logic_and"; break;
case lir_logic_or: s = "logic_or"; break;
--- a/src/hotspot/share/c1/c1_LIR.hpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/share/c1/c1_LIR.hpp Mon Sep 24 16:37:28 2018 -0700
@@ -911,7 +911,6 @@
, lir_null_check
, lir_return
, lir_leal
- , lir_neg
, lir_branch
, lir_cond_float_branch
, lir_move
@@ -939,6 +938,7 @@
, lir_rem
, lir_sqrt
, lir_abs
+ , lir_neg
, lir_tan
, lir_log10
, lir_logic_and
@@ -2075,7 +2075,6 @@
void branch_destination(Label* lbl) { append(new LIR_OpLabel(lbl)); }
- void negate(LIR_Opr from, LIR_Opr to) { append(new LIR_Op1(lir_neg, from, to)); }
void leal(LIR_Opr from, LIR_Opr result_reg, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = NULL) { append(new LIR_Op1(lir_leal, from, result_reg, T_ILLEGAL, patch_code, info)); }
// result is a stack location for old backend and vreg for UseLinearScan
@@ -2159,6 +2158,7 @@
LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
void abs (LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_abs , from, tmp, to)); }
+ void negate(LIR_Opr from, LIR_Opr to, LIR_Opr tmp = LIR_OprFact::illegalOpr) { append(new LIR_Op2(lir_neg, from, tmp, to)); }
void sqrt(LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_sqrt, from, tmp, to)); }
void fmad(LIR_Opr from, LIR_Opr from1, LIR_Opr from2, LIR_Opr to) { append(new LIR_Op3(lir_fmad, from, from1, from2, to)); }
void fmaf(LIR_Opr from, LIR_Opr from1, LIR_Opr from2, LIR_Opr to) { append(new LIR_Op3(lir_fmaf, from, from1, from2, to)); }
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp Mon Sep 24 16:37:28 2018 -0700
@@ -554,10 +554,6 @@
pop(op->in_opr());
break;
- case lir_neg:
- negate(op->in_opr(), op->result_opr());
- break;
-
case lir_leal:
leal(op->in_opr(), op->result_opr(), op->patch_code(), op->info());
break;
@@ -750,6 +746,10 @@
intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
break;
+ case lir_neg:
+ negate(op->in_opr1(), op->result_opr(), op->in_opr2());
+ break;
+
case lir_logic_and:
case lir_logic_or:
case lir_logic_xor:
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp Mon Sep 24 13:51:22 2018 -0700
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp Mon Sep 24 16:37:28 2018 -0700
@@ -239,7 +239,7 @@
void align_backward_branch_target();
void align_call(LIR_Code code);
- void negate(LIR_Opr left, LIR_Opr dest);
+ void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr);
void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info);
void rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info);