--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed May 13 21:06:30 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu May 14 16:16:06 2015 +0200
@@ -54,6 +54,36 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
// Implementation of AddressLiteral
+// A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
+unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
+ // -----------------Table 4.5 -------------------- //
+ 16, 32, 64, // EVEX_FV(0)
+ 4, 4, 4, // EVEX_FV(1) - with Evex.b
+ 16, 32, 64, // EVEX_FV(2) - with Evex.w
+ 8, 8, 8, // EVEX_FV(3) - with Evex.w and Evex.b
+ 8, 16, 32, // EVEX_HV(0)
+ 4, 4, 4, // EVEX_HV(1) - with Evex.b
+ // -----------------Table 4.6 -------------------- //
+ 16, 32, 64, // EVEX_FVM(0)
+ 1, 1, 1, // EVEX_T1S(0)
+ 2, 2, 2, // EVEX_T1S(1)
+ 4, 4, 4, // EVEX_T1S(2)
+ 8, 8, 8, // EVEX_T1S(3)
+ 4, 4, 4, // EVEX_T1F(0)
+ 8, 8, 8, // EVEX_T1F(1)
+ 8, 8, 8, // EVEX_T2(0)
+ 0, 16, 16, // EVEX_T2(1)
+ 0, 16, 16, // EVEX_T4(0)
+ 0, 0, 32, // EVEX_T4(1)
+ 0, 0, 32, // EVEX_T8(0)
+ 8, 16, 32, // EVEX_HVM(0)
+ 4, 8, 16, // EVEX_QVM(0)
+ 2, 4, 8, // EVEX_OVM(0)
+ 16, 16, 16, // EVEX_M128(0)
+ 8, 32, 64, // EVEX_DUP(0)
+ 0, 0, 0 // EVEX_NTUP
+};
+
AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
_is_lval = false;
_target = target;
@@ -183,8 +213,9 @@
// make this go away someday
void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
if (rtype == relocInfo::none)
- emit_int32(data);
- else emit_data(data, Relocation::spec_simple(rtype), format);
+ emit_int32(data);
+ else
+ emit_data(data, Relocation::spec_simple(rtype), format);
}
void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
@@ -273,6 +304,177 @@
}
+bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
+ int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
+ int mod_idx = 0;
+ // We will test if the displacement fits the compressed format and if so
+ // apply the compression to the displacment iff the result is8bit.
+ if (VM_Version::supports_evex() && is_evex_inst) {
+ switch (cur_tuple_type) {
+ case EVEX_FV:
+ if ((cur_encoding & VEX_W) == VEX_W) {
+ mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+ } else {
+ mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+ }
+ break;
+
+ case EVEX_HV:
+ mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+ break;
+
+ case EVEX_FVM:
+ break;
+
+ case EVEX_T1S:
+ switch (in_size_in_bits) {
+ case EVEX_8bit:
+ break;
+
+ case EVEX_16bit:
+ mod_idx = 1;
+ break;
+
+ case EVEX_32bit:
+ mod_idx = 2;
+ break;
+
+ case EVEX_64bit:
+ mod_idx = 3;
+ break;
+ }
+ break;
+
+ case EVEX_T1F:
+ case EVEX_T2:
+ case EVEX_T4:
+ mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
+ break;
+
+ case EVEX_T8:
+ break;
+
+ case EVEX_HVM:
+ break;
+
+ case EVEX_QVM:
+ break;
+
+ case EVEX_OVM:
+ break;
+
+ case EVEX_M128:
+ break;
+
+ case EVEX_DUP:
+ break;
+
+ default:
+ assert(0, "no valid evex tuple_table entry");
+ break;
+ }
+
+ if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
+ int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
+ if ((disp % disp_factor) == 0) {
+ int new_disp = disp / disp_factor;
+ if ((-0x80 <= new_disp && new_disp < 0x80)) {
+ disp = new_disp;
+ }
+ } else {
+ return false;
+ }
+ }
+ }
+ return (-0x80 <= disp && disp < 0x80);
+}
+
+
+bool Assembler::emit_compressed_disp_byte(int &disp) {
+ int mod_idx = 0;
+ // We will test if the displacement fits the compressed format and if so
+ // apply the compression to the displacment iff the result is8bit.
+ if (VM_Version::supports_evex() && is_evex_instruction) {
+ switch (tuple_type) {
+ case EVEX_FV:
+ if ((evex_encoding & VEX_W) == VEX_W) {
+ mod_idx += 2 + ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+ } else {
+ mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+ }
+ break;
+
+ case EVEX_HV:
+ mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+ break;
+
+ case EVEX_FVM:
+ break;
+
+ case EVEX_T1S:
+ switch (input_size_in_bits) {
+ case EVEX_8bit:
+ break;
+
+ case EVEX_16bit:
+ mod_idx = 1;
+ break;
+
+ case EVEX_32bit:
+ mod_idx = 2;
+ break;
+
+ case EVEX_64bit:
+ mod_idx = 3;
+ break;
+ }
+ break;
+
+ case EVEX_T1F:
+ case EVEX_T2:
+ case EVEX_T4:
+ mod_idx = (input_size_in_bits == EVEX_64bit) ? 1 : 0;
+ break;
+
+ case EVEX_T8:
+ break;
+
+ case EVEX_HVM:
+ break;
+
+ case EVEX_QVM:
+ break;
+
+ case EVEX_OVM:
+ break;
+
+ case EVEX_M128:
+ break;
+
+ case EVEX_DUP:
+ break;
+
+ default:
+ assert(0, "no valid evex tuple_table entry");
+ break;
+ }
+
+ if (avx_vector_len >= AVX_128bit && avx_vector_len <= AVX_512bit) {
+ int disp_factor = tuple_table[tuple_type + mod_idx][avx_vector_len];
+ if ((disp % disp_factor) == 0) {
+ int new_disp = disp / disp_factor;
+ if (is8bit(new_disp)) {
+ disp = new_disp;
+ }
+ } else {
+ return false;
+ }
+ }
+ }
+ return is8bit(disp);
+}
+
+
void Assembler::emit_operand(Register reg, Register base, Register index,
Address::ScaleFactor scale, int disp,
RelocationHolder const& rspec,
@@ -296,7 +498,7 @@
assert(index != rsp, "illegal addressing mode");
emit_int8(0x04 | regenc);
emit_int8(scale << 6 | indexenc | baseenc);
- } else if (is8bit(disp) && rtype == relocInfo::none) {
+ } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
// [base + index*scale + imm8]
// [01 reg 100][ss index base] imm8
assert(index != rsp, "illegal addressing mode");
@@ -318,7 +520,7 @@
// [00 reg 100][00 100 100]
emit_int8(0x04 | regenc);
emit_int8(0x24);
- } else if (is8bit(disp) && rtype == relocInfo::none) {
+ } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
// [rsp + imm8]
// [01 reg 100][00 100 100] disp8
emit_int8(0x44 | regenc);
@@ -339,7 +541,7 @@
// [base]
// [00 reg base]
emit_int8(0x00 | regenc | baseenc);
- } else if (is8bit(disp) && rtype == relocInfo::none) {
+ } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
// [base + disp8]
// [01 reg base] disp8
emit_int8(0x40 | regenc | baseenc);
@@ -389,11 +591,20 @@
emit_data(disp, rspec, disp32_operand);
}
}
+ is_evex_instruction = false;
}
void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
Address::ScaleFactor scale, int disp,
RelocationHolder const& rspec) {
+ if (UseAVX > 2) {
+ int xreg_enc = reg->encoding();
+ if (xreg_enc > 15) {
+ XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
+ emit_operand((Register)new_reg, base, index, scale, disp, rspec);
+ return;
+ }
+ }
emit_operand((Register)reg, base, index, scale, disp, rspec);
}
@@ -686,6 +897,29 @@
debug_only(has_disp32 = true); // has both kinds of operands!
break;
+ case 0x62: // EVEX_4bytes
+ assert((UseAVX > 0), "shouldn't have EVEX prefix");
+ assert(ip == inst+1, "no prefixes allowed");
+ // no EVEX collisions, all instructions that have 0x62 opcodes
+ // have EVEX versions and are subopcodes of 0x66
+ ip++; // skip P0 and exmaine W in P1
+ is_64bit = ((VEX_W & *ip) == VEX_W);
+ ip++; // move to P2
+ ip++; // skip P2, move to opcode
+ // To find the end of instruction (which == end_pc_operand).
+ switch (0xFF & *ip) {
+ case 0x61: // pcmpestri r, r/a, #8
+ case 0x70: // pshufd r, r/a, #8
+ case 0x73: // psrldq r, #8
+ tail_size = 1; // the imm8
+ break;
+ default:
+ break;
+ }
+ ip++; // skip opcode
+ debug_only(has_disp32 = true); // has both kinds of operands!
+ break;
+
case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
@@ -985,12 +1219,22 @@
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::addsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::addss(XMMRegister dst, XMMRegister src) {
@@ -1000,20 +1244,26 @@
void Assembler::addss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
}
void Assembler::aesdec(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDE);
emit_operand(dst, src);
}
void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDE);
emit_int8(0xC0 | encode);
}
@@ -1021,14 +1271,16 @@
void Assembler::aesdeclast(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDF);
emit_operand(dst, src);
}
void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDF);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1036,14 +1288,16 @@
void Assembler::aesenc(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDC);
emit_operand(dst, src);
}
void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDC);
emit_int8(0xC0 | encode);
}
@@ -1051,14 +1305,16 @@
void Assembler::aesenclast(XMMRegister dst, Address src) {
assert(VM_Version::supports_aes(), "");
InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDD);
emit_operand(dst, src);
}
void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_aes(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8((unsigned char)0xDD);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1091,7 +1347,7 @@
void Assembler::andnl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
+ int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false);
emit_int8((unsigned char)0xF2);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1099,7 +1355,7 @@
void Assembler::andnl(Register dst, Register src1, Address src2) {
InstructionMark im(this);
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- vex_prefix_0F38(dst, src1, src2);
+ vex_prefix_0F38(dst, src1, src2, false);
emit_int8((unsigned char)0xF2);
emit_operand(dst, src2);
}
@@ -1126,7 +1382,7 @@
void Assembler::blsil(Register dst, Register src) {
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
+ int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1134,14 +1390,14 @@
void Assembler::blsil(Register dst, Address src) {
InstructionMark im(this);
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- vex_prefix_0F38(rbx, dst, src);
+ vex_prefix_0F38(rbx, dst, src, false);
emit_int8((unsigned char)0xF3);
emit_operand(rbx, src);
}
void Assembler::blsmskl(Register dst, Register src) {
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
+ int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1149,14 +1405,14 @@
void Assembler::blsmskl(Register dst, Address src) {
InstructionMark im(this);
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- vex_prefix_0F38(rdx, dst, src);
+ vex_prefix_0F38(rdx, dst, src, false);
emit_int8((unsigned char)0xF3);
emit_operand(rdx, src);
}
void Assembler::blsrl(Register dst, Register src) {
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
+ int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1164,7 +1420,7 @@
void Assembler::blsrl(Register dst, Address src) {
InstructionMark im(this);
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
- vex_prefix_0F38(rcx, dst, src);
+ vex_prefix_0F38(rcx, dst, src, false);
emit_int8((unsigned char)0xF3);
emit_operand(rcx, src);
}
@@ -1312,22 +1568,36 @@
// NOTE: dbx seems to decode this as comiss even though the
// 0x66 is there. Strangly ucomisd comes out correct
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
+ } else {
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
+ } else {
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::comiss(XMMRegister dst, Address src) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
}
void Assembler::comiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
+ emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
}
void Assembler::cpuid() {
@@ -1347,36 +1617,61 @@
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1F;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
+ int encode = 0;
+ if (VM_Version::supports_evex()) {
+ encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
+ } else {
+ encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, false);
+ }
emit_int8(0x2A);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ emit_simd_arith_q(0x2A, dst, src, VEX_SIMD_F2, true);
+ } else {
+ emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, true);
emit_int8(0x2A);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
+ emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true);
}
void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
@@ -1385,6 +1680,10 @@
}
void Assembler::cvtss2sd(XMMRegister dst, Address src) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
}
@@ -1392,14 +1691,14 @@
void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
emit_int8(0x2C);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvttss2sil(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
emit_int8(0x2C);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1414,15 +1713,29 @@
void Assembler::divsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::divss(XMMRegister dst, Address src) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
NOT_LP64(assert(VM_Version::supports_sse(), ""));
emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
}
@@ -1675,7 +1988,11 @@
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66, true);
+ } else {
+ emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
@@ -1685,7 +2002,8 @@
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
+ int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F,
+ false, AVX_128bit);
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1698,6 +2016,51 @@
emit_operand(dst, src);
}
+void Assembler::kmovq(KRegister dst, KRegister src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
+ true, VEX_OPCODE_0F, true);
+ emit_int8((unsigned char)0x90);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovq(KRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ int dst_enc = dst->encoding();
+ int nds_enc = 0;
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
+ VEX_OPCODE_0F, true, AVX_128bit, true, true);
+ emit_int8((unsigned char)0x90);
+ emit_operand((Register)dst, src);
+}
+
+void Assembler::kmovq(Address dst, KRegister src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ int src_enc = src->encoding();
+ int nds_enc = 0;
+ vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
+ VEX_OPCODE_0F, true, AVX_128bit, true, true);
+ emit_int8((unsigned char)0x90);
+ emit_operand((Register)src, dst);
+}
+
+void Assembler::kmovql(KRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ bool supports_bw = VM_Version::supports_avx512bw();
+ VexSimdPrefix pre = supports_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+ int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true,
+ VEX_OPCODE_0F, supports_bw);
+ emit_int8((unsigned char)0x92);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovdl(KRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_evex(), ""));
+ VexSimdPrefix pre = VM_Version::supports_avx512bw() ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+ int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true, VEX_OPCODE_0F, false);
+ emit_int8((unsigned char)0x92);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
void Assembler::movb(Address dst, int imm8) {
InstructionMark im(this);
@@ -1718,7 +2081,7 @@
void Assembler::movdl(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, true);
emit_int8(0x6E);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -1726,23 +2089,31 @@
void Assembler::movdl(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// swap src/dst to get correct prefix
- int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, true);
emit_int8(0x7E);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::movdl(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66, true, VEX_OPCODE_0F);
emit_int8(0x6E);
emit_operand(dst, src);
}
void Assembler::movdl(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66, true);
emit_int8(0x7E);
emit_operand(src, dst);
}
@@ -1754,11 +2125,17 @@
void Assembler::movdqa(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
}
void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
}
@@ -1769,8 +2146,11 @@
void Assembler::movdqu(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F3);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_F3, false);
emit_int8(0x7F);
emit_operand(src, dst);
}
@@ -1778,28 +2158,77 @@
// Move Unaligned 256bit Vector
void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(UseAVX > 0, "");
- bool vector256 = true;
- int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
emit_int8(0x6F);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, "");
- InstructionMark im(this);
- bool vector256 = true;
- vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_256bit;
+ vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::vmovdqu(Address dst, XMMRegister src) {
assert(UseAVX > 0, "");
- InstructionMark im(this);
- bool vector256 = true;
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_256bit;
// swap src<->dst for encoding
assert(src != xnoreg, "sanity");
- vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
+ vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
+ emit_int8(0x7F);
+ emit_operand(src, dst);
+}
+
+// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
+void Assembler::evmovdqu(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "");
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
+ true, vector_len, false, false);
+ emit_int8(0x6F);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdqu(XMMRegister dst, Address src, int vector_len) {
+ assert(UseAVX > 0, "");
+ InstructionMark im(this);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
+ } else {
+ vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
+ }
+ emit_int8(0x6F);
+ emit_operand(dst, src);
+}
+
+void Assembler::evmovdqu(Address dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "");
+ InstructionMark im(this);
+ assert(src != xnoreg, "sanity");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ // swap src<->dst for encoding
+ vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
+ } else {
+ // swap src<->dst for encoding
+ vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
+ }
emit_int8(0x7F);
emit_operand(src, dst);
}
@@ -1845,7 +2274,11 @@
// The selection is done in MacroAssembler::movdbl() and movflt().
void Assembler::movlpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_simd_arith(0x12, dst, src, VEX_SIMD_66, true);
}
void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1871,7 +2304,13 @@
void Assembler::movq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F3);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, true);
+ } else {
+ simd_prefix(dst, src, VEX_SIMD_F3, true, VEX_OPCODE_0F);
+ }
emit_int8(0x7E);
emit_operand(dst, src);
}
@@ -1879,7 +2318,14 @@
void Assembler::movq(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, true,
+ VEX_OPCODE_0F, true, AVX_128bit);
+ } else {
+ simd_prefix(dst, src, VEX_SIMD_66, true);
+ }
emit_int8((unsigned char)0xD6);
emit_operand(src, dst);
}
@@ -1902,36 +2348,60 @@
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, true);
+ } else {
+ emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::movsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, true);
+ } else {
+ emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::movsd(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
+ } else {
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, false);
+ }
emit_int8(0x11);
emit_operand(src, dst);
}
void Assembler::movss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
+ emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, true);
}
void Assembler::movss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, true);
}
void Assembler::movss(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F3);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_F3, false);
emit_int8(0x11);
emit_operand(src, dst);
}
@@ -2023,16 +2493,30 @@
void Assembler::mulsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::mulss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
}
@@ -2332,22 +2816,30 @@
void Assembler::packuswb(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
+ false, (VM_Version::supports_avx512dq() == false));
}
void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
+ emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
+ false, (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len,
+ false, (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
assert(VM_Version::supports_avx2(), "");
- int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_3A, true, vector_len);
emit_int8(0x00);
emit_int8(0xC0 | encode);
emit_int8(imm8);
@@ -2361,7 +2853,8 @@
void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A,
+ false, AVX_128bit, true);
emit_int8(0x61);
emit_operand(dst, src);
emit_int8(imm8);
@@ -2369,7 +2862,8 @@
void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_2(), "");
- int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_3A, false, AVX_128bit, true);
emit_int8(0x61);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
@@ -2377,7 +2871,8 @@
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+ false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
@@ -2385,7 +2880,8 @@
void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+ false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
@@ -2393,7 +2889,8 @@
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+ false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
emit_int8(0x22);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
@@ -2401,7 +2898,8 @@
void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+ false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
emit_int8(0x22);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
@@ -2409,15 +2907,18 @@
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_HVM;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
emit_int8(0x30);
emit_operand(dst, src);
}
void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
emit_int8(0x30);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -2520,15 +3021,20 @@
void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_ssse3(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
+ false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
emit_int8(0x00);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::pshufb(XMMRegister dst, Address src) {
assert(VM_Version::supports_ssse3(), "");
- InstructionMark im(this);
- simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
+ false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
emit_int8(0x00);
emit_operand(dst, src);
}
@@ -2545,8 +3051,12 @@
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66, false);
emit_int8(0x70);
emit_operand(dst, src);
emit_int8(mode & 0xFF);
@@ -2555,7 +3065,8 @@
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
+ emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, false,
+ (VM_Version::supports_avx512bw() == false));
emit_int8(mode & 0xFF);
}
@@ -2563,8 +3074,12 @@
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
- InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ InstructionMark im(this);
+ simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, false, VEX_OPCODE_0F,
+ false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
emit_int8(0x70);
emit_operand(dst, src);
emit_int8(mode & 0xFF);
@@ -2573,7 +3088,8 @@
void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F,
+ false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
emit_int8(0x73);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift);
@@ -2583,14 +3099,15 @@
assert(VM_Version::supports_sse4_1(), "");
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
- simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
emit_int8(0x17);
emit_operand(dst, src);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+ false, VEX_OPCODE_0F_38);
emit_int8(0x17);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -2598,19 +3115,20 @@
void Assembler::vptest(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
- bool vector256 = true;
+ int vector_len = AVX_256bit;
assert(dst != xnoreg, "sanity");
int dst_enc = dst->encoding();
// swap src<->dst for encoding
- vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+ vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
emit_int8(0x17);
emit_operand(dst, src);
}
void Assembler::vptest(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- bool vector256 = true;
- int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+ vector_len, VEX_OPCODE_0F_38);
emit_int8(0x17);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -2618,6 +3136,9 @@
void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
}
@@ -2629,6 +3150,10 @@
void Assembler::punpckldq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
}
@@ -2838,12 +3363,22 @@
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::sqrtsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
@@ -2857,6 +3392,10 @@
void Assembler::sqrtss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
}
@@ -2907,12 +3446,20 @@
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
+ } else {
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
+ }
}
void Assembler::subsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ }
+ emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
}
void Assembler::subss(XMMRegister dst, XMMRegister src) {
@@ -2922,6 +3469,10 @@
void Assembler::subss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
}
@@ -2978,22 +3529,36 @@
void Assembler::ucomisd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
+ } else {
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
+ } else {
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::ucomiss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
}
void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
+ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
}
void Assembler::xabort(int8_t imm8) {
@@ -3075,82 +3640,138 @@
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ } else {
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+ }
}
void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
}
//====================VECTOR ARITHMETIC=====================================
@@ -3159,7 +3780,11 @@
void Assembler::addpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::addps(XMMRegister dst, XMMRegister src) {
@@ -3167,29 +3792,47 @@
emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
}
-void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
}
void Assembler::subpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::subps(XMMRegister dst, XMMRegister src) {
@@ -3197,29 +3840,47 @@
emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
}
-void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
}
void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::mulps(XMMRegister dst, XMMRegister src) {
@@ -3227,29 +3888,47 @@
emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
}
-void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
}
void Assembler::divpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::divps(XMMRegister dst, XMMRegister src) {
@@ -3257,118 +3936,199 @@
emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
}
-void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
}
void Assembler::andpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
+ }
}
void Assembler::andps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, false,
+ (VM_Version::supports_avx512dq() == false));
}
void Assembler::andps(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE,
+ false, (VM_Version::supports_avx512dq() == false));
}
void Assembler::andpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
+ }
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
+ }
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ bool legacy_mode = (VM_Version::supports_avx512dq() == false);
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, legacy_mode);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
+ }
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len,
+ (VM_Version::supports_avx512dq() == false));
}
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
+ }
}
void Assembler::xorps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE,
+ false, (VM_Version::supports_avx512dq() == false));
}
void Assembler::xorpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
+ }
}
void Assembler::xorps(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, false,
+ (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
+ }
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
+ (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
+ }
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
- emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
+ (VM_Version::supports_avx512dq() == false));
+}
// Integer vector arithmetic
-void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx() && (vector_len == 0) ||
+ VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
+ VEX_OPCODE_0F_38, true, false);
emit_int8(0x01);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx() && (vector_len == 0) ||
+ VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
+ VEX_OPCODE_0F_38, true, false);
emit_int8(0x02);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -3390,61 +4150,89 @@
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+ }
}
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8(0x01);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_38, false, AVX_128bit, true);
emit_int8(0x02);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
}
void Assembler::psubb(XMMRegister dst, XMMRegister src) {
@@ -3464,84 +4252,149 @@
void Assembler::psubq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+ }
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+ }
}
void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+ emit_simd_arith(0xD5, dst, src, VEX_SIMD_66,
+ (VM_Version::supports_avx512bw() == false));
}
void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
+ false, VEX_OPCODE_0F_38);
+ emit_int8(0x40);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
+ vector_len, VEX_OPCODE_0F_38);
emit_int8(0x40);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires some form of AVX");
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
+ VEX_OPCODE_0F_38, true, vector_len, false, false);
emit_int8(0x40);
emit_int8((unsigned char)(0xC0 | encode));
}
-void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FVM;
+ }
+ emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
InstructionMark im(this);
int dst_enc = dst->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
- vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
+ VEX_OPCODE_0F_38, false, vector_len);
+ emit_int8(0x40);
+ emit_operand(dst, src);
+}
+
+void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_64bit;
+ }
+ InstructionMark im(this);
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
emit_int8(0x40);
emit_operand(dst, src);
}
@@ -3550,7 +4403,8 @@
void Assembler::psllw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM6 is for /6 encoding: 66 0F 71 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
+ false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
emit_int8(0x71);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3559,7 +4413,7 @@
void Assembler::pslld(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM6 is for /6 encoding: 66 0F 72 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false);
emit_int8(0x72);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3568,7 +4422,7 @@
void Assembler::psllq(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
emit_int8(0x73);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3576,7 +4430,8 @@
void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+ emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, false,
+ (VM_Version::supports_avx512bw() == false));
}
void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
@@ -3586,50 +4441,65 @@
void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
-}
-
-void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+ }
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM6 is for /6 encoding: 66 0F 71 /6 ib
- emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+ emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
emit_int8(shift & 0xFF);
}
-void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM6 is for /6 encoding: 66 0F 72 /6 ib
- emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+ emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
emit_int8(shift & 0xFF);
}
-void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
- emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
+ }
emit_int8(shift & 0xFF);
}
-void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
+ }
}
// Shift packed integers logically right by specified number of bits.
void Assembler::psrlw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM2 is for /2 encoding: 66 0F 71 /2 ib
- int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
+ (VM_Version::supports_avx512bw() == false));
emit_int8(0x71);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3638,7 +4508,7 @@
void Assembler::psrld(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM2 is for /2 encoding: 66 0F 72 /2 ib
- int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false);
emit_int8(0x72);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3649,7 +4519,12 @@
// shifts 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
- int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+ int encode = 0;
+ if (VM_Version::supports_evex() && VM_Version::supports_avx512bw()) {
+ encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false);
+ } else {
+ encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
+ }
emit_int8(0x73);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3657,7 +4532,8 @@
void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+ emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, false,
+ (VM_Version::supports_avx512bw() == false));
}
void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
@@ -3667,50 +4543,65 @@
void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
-}
-
-void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+ if (VM_Version::supports_evex()) {
+ emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
+ } else {
+ emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+ }
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
- emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+ emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
emit_int8(shift & 0xFF);
}
-void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
- emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+ emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
emit_int8(shift & 0xFF);
}
-void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
- emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
+ }
emit_int8(shift & 0xFF);
}
-void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
+ } else {
+ emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
+ }
}
// Shift packed integers arithmetically right by specified number of bits.
void Assembler::psraw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
- int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
+ (VM_Version::supports_avx512bw() == false));
emit_int8(0x71);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3719,7 +4610,7 @@
void Assembler::psrad(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// XMM4 is for /4 encoding: 66 0F 72 /4 ib
- int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false);
emit_int8(0x72);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(shift & 0xFF);
@@ -3727,7 +4618,8 @@
void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+ emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66,
+ (VM_Version::supports_avx512bw() == false));
}
void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
@@ -3735,28 +4627,30 @@
emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
}
-void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
- emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+ emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
emit_int8(shift & 0xFF);
}
-void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
- emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+ emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
emit_int8(shift & 0xFF);
}
-void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len,
+ (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
}
@@ -3766,14 +4660,18 @@
emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
}
-void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
}
void Assembler::por(XMMRegister dst, XMMRegister src) {
@@ -3781,14 +4679,18 @@
emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
}
-void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
}
void Assembler::pxor(XMMRegister dst, XMMRegister src) {
@@ -3796,21 +4698,25 @@
emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
}
-void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
- assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
- emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_FV;
+ input_size_in_bits = EVEX_32bit;
+ }
+ emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
}
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- bool vector256 = true;
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
emit_int8(0x18);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits
@@ -3818,14 +4724,51 @@
emit_int8(0x01);
}
+void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
+ VEX_OPCODE_0F_3A, true, vector_len, false, false);
+ emit_int8(0x1A);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x00 - insert into lower 256 bits
+ // 0x01 - insert into upper 256 bits
+ emit_int8(0x01);
+}
+
+void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T4;
+ input_size_in_bits = EVEX_64bit;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_512bit;
+ assert(dst != xnoreg, "sanity");
+ int dst_enc = dst->encoding();
+ // swap src<->dst for encoding
+ vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len);
+ emit_int8(0x1A);
+ emit_operand(dst, src);
+ // 0x01 - insert into upper 128 bits
+ emit_int8(0x01);
+}
+
void Assembler::vinsertf128h(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- bool vector256 = true;
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T4;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_256bit;
assert(dst != xnoreg, "sanity");
int dst_enc = dst->encoding();
// swap src<->dst for encoding
- vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
emit_int8(0x18);
emit_operand(dst, src);
// 0x01 - insert into upper 128 bits
@@ -3834,8 +4777,8 @@
void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- bool vector256 = true;
- int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits
@@ -3845,11 +4788,15 @@
void Assembler::vextractf128h(Address dst, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
- InstructionMark im(this);
- bool vector256 = true;
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T4;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_256bit;
assert(src != xnoreg, "sanity");
int src_enc = src->encoding();
- vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
emit_int8(0x19);
emit_operand(src, dst);
// 0x01 - extract from upper 128 bits
@@ -3858,8 +4805,8 @@
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx2(), "");
- bool vector256 = true;
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
emit_int8(0x38);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits
@@ -3867,38 +4814,169 @@
emit_int8(0x01);
}
+void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+ VM_Version::supports_avx512dq(), vector_len, false, false);
+ emit_int8(0x38);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x00 - insert into lower 256 bits
+ // 0x01 - insert into upper 256 bits
+ emit_int8(0x01);
+}
+
void Assembler::vinserti128h(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx2(), "");
- InstructionMark im(this);
- bool vector256 = true;
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T4;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_256bit;
assert(dst != xnoreg, "sanity");
int dst_enc = dst->encoding();
// swap src<->dst for encoding
- vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
emit_int8(0x38);
emit_operand(dst, src);
// 0x01 - insert into upper 128 bits
emit_int8(0x01);
}
+void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+ emit_int8(0x39);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x00 - insert into lower 128 bits
+ // 0x01 - insert into upper 128 bits
+ emit_int8(0x01);
+}
+
void Assembler::vextracti128h(Address dst, XMMRegister src) {
assert(VM_Version::supports_avx2(), "");
- InstructionMark im(this);
- bool vector256 = true;
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T4;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ int vector_len = AVX_256bit;
assert(src != xnoreg, "sanity");
int src_enc = src->encoding();
- vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
emit_int8(0x39);
emit_operand(src, dst);
// 0x01 - extract from upper 128 bits
emit_int8(0x01);
}
+void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+ true, vector_len, false, false);
+ emit_int8(0x3B);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x01 - extract from upper 256 bits
+ emit_int8(0x01);
+}
+
+void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+ VM_Version::supports_avx512dq(), vector_len, false, false);
+ emit_int8(0x39);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x01 - extract from bits 255:128
+ // 0x02 - extract from bits 383:256
+ // 0x03 - extract from bits 511:384
+ emit_int8(value & 0x3);
+}
+
+void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+ VM_Version::supports_avx512dq(), vector_len, false, false);
+ emit_int8(0x1B);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x01 - extract from upper 256 bits
+ emit_int8(0x01);
+}
+
+void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
+ assert(VM_Version::supports_avx2(), "");
+ tuple_type = EVEX_T4;
+ input_size_in_bits = EVEX_64bit;
+ InstructionMark im(this);
+ int vector_len = AVX_512bit;
+ assert(src != xnoreg, "sanity");
+ int src_enc = src->encoding();
+ vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+ VM_Version::supports_avx512dq(), vector_len);
+ emit_int8(0x1B);
+ emit_operand(src, dst);
+ // 0x01 - extract from upper 128 bits
+ emit_int8(0x01);
+}
+
+void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66,
+ VEX_OPCODE_0F_3A, false, vector_len, false, false);
+ emit_int8(0x19);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x01 - extract from bits 255:128
+ // 0x02 - extract from bits 383:256
+ // 0x03 - extract from bits 511:384
+ emit_int8(value & 0x3);
+}
+
+void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
+ assert(VM_Version::supports_evex(), "");
+ int vector_len = AVX_512bit;
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+ VM_Version::supports_avx512dq(), vector_len, false, false);
+ emit_int8(0x19);
+ emit_int8((unsigned char)(0xC0 | encode));
+ // 0x01 - extract from bits 255:128
+ // 0x02 - extract from bits 383:256
+ // 0x03 - extract from bits 511:384
+ emit_int8(value & 0x3);
+}
+
// duplicate 4-bytes integer data from src into 8 locations in dest
void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx2(), "");
- bool vector256 = true;
- int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ int vector_len = AVX_256bit;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+ vector_len, VEX_OPCODE_0F_38, false);
+ emit_int8(0x58);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// duplicate 4-bytes integer data from src into 8 locations in dest
+void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+ vector_len, VEX_OPCODE_0F_38, false);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -3906,7 +4984,8 @@
// Carry-Less Multiplication Quadword
void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
assert(VM_Version::supports_clmul(), "");
- int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+ VEX_OPCODE_0F_3A, false, AVX_128bit, true);
emit_int8(0x44);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8((unsigned char)mask);
@@ -3915,8 +4994,9 @@
// Carry-Less Multiplication Quadword
void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
- bool vector256 = false;
- int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ int vector_len = AVX_128bit;
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
+ vector_len, VEX_OPCODE_0F_3A, true);
emit_int8(0x44);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8((unsigned char)mask);
@@ -3924,8 +5004,11 @@
void Assembler::vzeroupper() {
assert(VM_Version::supports_avx(), "");
- (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
- emit_int8(0x77);
+ if (UseAVX < 3)
+ {
+ (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
+ emit_int8(0x77);
+ }
}
@@ -4442,7 +5525,7 @@
}
-void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
prefix(VEX_3bytes);
@@ -4452,7 +5535,7 @@
emit_int8(byte1);
int byte2 = ((~nds_enc) & 0xf) << 3;
- byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
+ byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
emit_int8(byte2);
} else {
prefix(VEX_2bytes);
@@ -4460,89 +5543,237 @@
int byte1 = vex_r ? VEX_R : 0;
byte1 = (~byte1) & 0x80;
byte1 |= ((~nds_enc) & 0xf) << 3;
- byte1 |= (vector256 ? 4 : 0) | pre;
+ byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
emit_int8(byte1);
}
}
-void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
+// This is a 4 byte encoding
+void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
+ int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+ bool is_extended_context, bool is_merge_context,
+ int vector_len, bool no_mask_reg ){
+ // EVEX 0x62 prefix
+ prefix(EVEX_4bytes);
+ evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
+
+ // P0: byte 2, initialized to RXBR`00mm
+ // instead of not'd
+ int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
+ byte2 = (~byte2) & 0xF0;
+ // confine opc opcode extensions in mm bits to lower two bits
+ // of form {0F, 0F_38, 0F_3A}
+ byte2 |= opc;
+ emit_int8(byte2);
+
+ // P1: byte 3 as Wvvvv1pp
+ int byte3 = ((~nds_enc) & 0xf) << 3;
+ // p[10] is always 1
+ byte3 |= EVEX_F;
+ byte3 |= (vex_w & 1) << 7;
+ // confine pre opcode extensions in pp bits to lower two bits
+ // of form {66, F3, F2}
+ byte3 |= pre;
+ emit_int8(byte3);
+
+ // P2: byte 4 as zL'Lbv'aaa
+ int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+ // EVEX.v` for extending EVEX.vvvv or VIDX
+ byte4 |= (evex_v ? 0: EVEX_V);
+ // third EXEC.b for broadcast actions
+ byte4 |= (is_extended_context ? EVEX_Rb : 0);
+ // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
+ byte4 |= ((vector_len) & 0x3) << 5;
+ // last is EVEX.z for zero/merge actions
+ byte4 |= (is_merge_context ? EVEX_Z : 0);
+ emit_int8(byte4);
+}
+
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
+ VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
bool vex_r = (xreg_enc >= 8);
bool vex_b = adr.base_needs_rex();
bool vex_x = adr.index_needs_rex();
- vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
-}
-
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
+ avx_vector_len = vector_len;
+
+ // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
+ if (VM_Version::supports_avx512vl() == false) {
+ switch (vector_len) {
+ case AVX_128bit:
+ case AVX_256bit:
+ legacy_mode = true;
+ break;
+ }
+ }
+
+ if ((UseAVX > 2) && (legacy_mode == false))
+ {
+ bool evex_r = (xreg_enc >= 16);
+ bool evex_v = (nds_enc >= 16);
+ is_evex_instruction = true;
+ evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+ } else {
+ vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+ }
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+ bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
bool vex_r = (dst_enc >= 8);
bool vex_b = (src_enc >= 8);
bool vex_x = false;
- vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+ avx_vector_len = vector_len;
+
+ // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
+ if (VM_Version::supports_avx512vl() == false) {
+ switch (vector_len) {
+ case AVX_128bit:
+ case AVX_256bit:
+ legacy_mode = true;
+ break;
+ }
+ }
+
+ if ((UseAVX > 2) && (legacy_mode == false))
+ {
+ bool evex_r = (dst_enc >= 16);
+ bool evex_v = (nds_enc >= 16);
+ // can use vex_x as bank extender on rm encoding
+ vex_x = (src_enc >= 16);
+ evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+ } else {
+ vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+ }
+
+ // return modrm byte components for operands
return (((dst_enc & 7) << 3) | (src_enc & 7));
}
-void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
+ bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
if (UseAVX > 0) {
int xreg_enc = xreg->encoding();
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
- vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
+ vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
} else {
assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
rex_prefix(adr, xreg, pre, opc, rex_w);
}
}
-int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
+ bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
int dst_enc = dst->encoding();
int src_enc = src->encoding();
if (UseAVX > 0) {
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
- return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
} else {
assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
}
}
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
- InstructionMark im(this);
- simd_prefix(dst, dst, src, pre);
+int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
+ bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+}
+
+int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
+ bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
+ int dst_enc = dst->encoding();
+ int src_enc = src->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
+ InstructionMark im(this);
+ simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
emit_int8(opcode);
emit_operand(dst, src);
}
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
- int encode = simd_prefix_and_encode(dst, dst, src, pre);
+void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
+ InstructionMark im(this);
+ simd_prefix_q(dst, dst, src, pre, no_mask_reg);
+ emit_int8(opcode);
+ emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
+ int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
+ emit_int8(opcode);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
+ int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
emit_int8(opcode);
emit_int8((unsigned char)(0xC0 | encode));
}
// Versions with no second source register (non-destructive source).
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
- InstructionMark im(this);
- simd_prefix(dst, xnoreg, src, pre);
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
+ InstructionMark im(this);
+ simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
emit_int8(opcode);
emit_operand(dst, src);
}
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
- int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
+ InstructionMark im(this);
+ simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
+ emit_int8(opcode);
+ emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, legacy_mode, AVX_128bit);
+ emit_int8(opcode);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
emit_int8(opcode);
emit_int8((unsigned char)(0xC0 | encode));
}
// 3-operands AVX instructions
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
- Address src, VexSimdPrefix pre, bool vector256) {
- InstructionMark im(this);
- vex_prefix(dst, nds, src, pre, vector256);
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
+ VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
+ InstructionMark im(this);
+ vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
+ emit_int8(opcode);
+ emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
+ Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
+ InstructionMark im(this);
+ vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
emit_int8(opcode);
emit_operand(dst, src);
}
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
- XMMRegister src, VexSimdPrefix pre, bool vector256) {
- int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
+ VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
+ int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, false, no_mask_reg);
+ emit_int8(opcode);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
+ VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
emit_int8(opcode);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -5040,6 +6271,10 @@
}
void Assembler::andnq(Register dst, Register src1, Address src2) {
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_64bit;
+ }
InstructionMark im(this);
assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
vex_prefix_0F38_q(dst, src1, src2);
@@ -5181,44 +6416,52 @@
void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
+ int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
emit_int8(0x2A);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionMark im(this);
- simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ simd_prefix_q(dst, dst, src, VEX_SIMD_F2, true);
emit_int8(0x2A);
emit_operand(dst, src);
}
void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
+ int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
emit_int8(0x2A);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- InstructionMark im(this);
- simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
+ if (VM_Version::supports_evex()) {
+ tuple_type = EVEX_T1S;
+ input_size_in_bits = EVEX_32bit;
+ }
+ InstructionMark im(this);
+ simd_prefix_q(dst, dst, src, VEX_SIMD_F3, true);
emit_int8(0x2A);
emit_operand(dst, src);
}
void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
+ int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
emit_int8(0x2C);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvttss2siq(Register dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
- int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
+ int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
emit_int8(0x2C);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -5387,7 +6630,7 @@
void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, true);
emit_int8(0x6E);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -5396,7 +6639,7 @@
// table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
// swap src/dst to get correct prefix
- int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
+ int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, true);
emit_int8(0x7E);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -5529,7 +6772,8 @@
void Assembler::mulxq(Register dst1, Register dst2, Register src) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
- int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);
+ int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(),
+ VEX_SIMD_F2, VEX_OPCODE_0F_38, true, AVX_128bit, true, false);
emit_int8((unsigned char)0xF6);
emit_int8((unsigned char)(0xC0 | encode));
}
@@ -5678,7 +6922,8 @@
void Assembler::rorxq(Register dst, Register src, int imm8) {
assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
- int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2,
+ VEX_OPCODE_0F_3A, true, AVX_128bit, true, false);
emit_int8((unsigned char)0xF0);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
--- a/hotspot/src/cpu/x86/vm/x86.ad Wed May 13 21:06:30 2015 +0200
+++ b/hotspot/src/cpu/x86/vm/x86.ad Thu May 14 16:16:06 2015 +0200
@@ -59,15 +59,19 @@
//
// The encoding number is the actual bit-pattern placed into the opcodes.
-// XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
+// XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
// Word a in each register holds a Float, words ab hold a Double.
// The whole registers are used in SSE4.2 version intrinsics,
// array copy stubs and superword operations (see UseSSE42Intrinsics,
// UseXMMForArrayCopy and UseSuperword flags).
-// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
+// For pre EVEX enabled architectures:
+// XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
+// For EVEX enabled architectures:
+// XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
+//
// Linux ABI: No register preserved across function calls
// XMM0-XMM7 might hold parameters
-// Windows ABI: XMM6-XMM15 preserved across function calls
+// Windows ABI: XMM6-XMM31 preserved across function calls
// XMM0-XMM3 might hold parameters
reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
@@ -78,6 +82,14 @@
reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
+reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
+reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
+reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
+reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
+reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
+reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
+reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
+reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
@@ -87,6 +99,14 @@
reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
+reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
+reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
+reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
+reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
+reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
+reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
+reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
+reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
@@ -96,6 +116,14 @@
reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
+reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
+reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
+reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
+reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
+reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
+reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
+reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
+reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
@@ -105,6 +133,14 @@
reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
+reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
+reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
+reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
+reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
+reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
+reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
+reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
+reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
@@ -114,6 +150,14 @@
reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
+reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
+reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
+reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
+reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
+reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
+reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
+reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
+reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
@@ -123,6 +167,14 @@
reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
+reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
+reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
+reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
+reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
+reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
+reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
+reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
+reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
#ifdef _WIN64
@@ -134,6 +186,14 @@
reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
+reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
+reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
+reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
+reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
+reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
+reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
+reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
+reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
@@ -143,6 +203,14 @@
reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
+reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
+reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
+reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
+reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
+reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
+reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
+reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
+reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
@@ -152,6 +220,14 @@
reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
+reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
+reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
+reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
+reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
+reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
+reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
+reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
+reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
@@ -161,6 +237,14 @@
reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
+reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
+reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
+reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
+reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
+reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
+reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
+reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
+reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
@@ -170,6 +254,14 @@
reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
+reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
+reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
+reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
+reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
+reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
+reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
+reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
+reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
@@ -179,6 +271,14 @@
reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
+reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
+reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
+reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
+reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
+reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
+reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
+reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
+reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
@@ -188,6 +288,14 @@
reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
+reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
+reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
+reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
+reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
+reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
+reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
+reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
+reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
@@ -197,6 +305,14 @@
reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
+reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
+reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
+reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
+reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
+reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
+reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
+reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
+reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
@@ -206,6 +322,14 @@
reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
+reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
+reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
+reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
+reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
+reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
+reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
+reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
+reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
@@ -215,6 +339,285 @@
reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
+reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
+reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
+reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
+reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
+reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
+reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
+reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
+reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
+
+reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
+reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
+reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
+reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
+reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
+reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
+reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
+reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
+reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
+reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
+reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
+reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
+reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
+reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
+reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
+reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
+
+reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
+reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
+reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
+reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
+reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
+reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
+reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
+reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
+reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
+reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
+reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
+reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
+reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
+reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
+reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
+reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
+
+reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
+reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
+reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
+reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
+reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
+reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
+reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
+reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
+reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
+reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
+reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
+reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
+reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
+reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
+reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
+reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
+
+reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
+reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
+reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
+reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
+reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
+reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
+reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
+reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
+reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
+reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
+reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
+reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
+reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
+reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
+reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
+reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
+
+reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
+reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
+reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
+reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
+reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
+reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
+reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
+reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
+reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
+reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
+reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
+reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
+reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
+reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
+reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
+reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
+
+reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
+reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
+reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
+reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
+reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
+reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
+reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
+reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
+reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
+reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
+reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
+reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
+reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
+reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
+reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
+reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
+
+reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
+reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
+reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
+reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
+reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
+reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
+reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
+reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
+reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
+reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
+reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
+reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
+reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
+reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
+reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
+reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
+
+reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
+reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
+reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
+reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
+reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
+reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
+reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
+reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
+reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
+reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
+reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
+reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
+reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
+reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
+reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
+reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
+
+reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
+reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
+reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
+reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
+reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
+reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
+reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
+reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
+reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
+reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
+reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
+reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
+reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
+reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
+reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
+reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
+
+reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
+reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
+reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
+reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
+reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
+reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
+reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
+reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
+reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
+reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
+reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
+reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
+reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
+reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
+reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
+reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
+
+reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
+reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
+reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
+reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
+reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
+reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
+reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
+reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
+reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
+reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
+reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
+reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
+reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
+reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
+reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
+reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
+
+reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
+reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
+reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
+reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
+reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
+reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
+reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
+reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
+reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
+reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
+reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
+reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
+reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
+reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
+reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
+
+reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
+reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
+reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
+reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
+reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
+reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
+reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
+reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
+reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
+reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
+reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
+reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
+reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
+reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
+reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
+reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
+
+reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
+reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
+reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
+reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
+reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
+reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
+reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
+reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
+reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
+reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
+reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
+reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
+reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
+reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
+reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
+reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
+
+reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
+reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
+reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
+reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
+reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
+reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
+reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
+reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
+reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
+reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
+reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
+reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
+reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
+reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
+reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
+reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
+
+reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
+reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
+reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
+reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
+reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
+reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
+reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
+reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
+reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
+reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
+reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
+reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
+reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
+reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
+reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
+reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
#else // _WIN64
@@ -226,6 +629,14 @@
reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
+reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
+reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
+reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
+reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
+reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
+reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
+reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
+reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
@@ -235,6 +646,14 @@
reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
+reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
+reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
+reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
+reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
+reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
+reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
+reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
+reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
#ifdef _LP64
@@ -246,6 +665,14 @@
reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
+reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
+reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
+reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
+reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
+reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
+reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
+reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
+reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
@@ -255,6 +682,14 @@
reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
+reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
+reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
+reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
+reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
+reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
+reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
+reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
+reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
@@ -264,6 +699,14 @@
reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
+reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
+reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
+reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
+reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
+reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
+reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
+reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
+reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
@@ -273,6 +716,14 @@
reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
+reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
+reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
+reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
+reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
+reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
+reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
+reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
+reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
@@ -282,6 +733,14 @@
reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
+reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
+reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
+reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
+reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
+reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
+reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
+reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
+reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
@@ -291,6 +750,14 @@
reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
+reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
+reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
+reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
+reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
+reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
+reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
+reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
+reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
@@ -300,6 +767,14 @@
reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
+reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
+reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
+reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
+reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
+reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
+reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
+reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
+reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
@@ -309,6 +784,286 @@
reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
+reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
+reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
+reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
+reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
+reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
+reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
+reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
+reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
+
+reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
+reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
+reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
+reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
+reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
+reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
+reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
+reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
+reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
+reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
+reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
+reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
+reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
+reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
+reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
+reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
+
+reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
+reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
+reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
+reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
+reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
+reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
+reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
+reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
+reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
+reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
+reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
+reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
+reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
+reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
+reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
+reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
+
+reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
+reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
+reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
+reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
+reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
+reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
+reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
+reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
+reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
+reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
+reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
+reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
+reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
+reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
+reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
+reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
+
+reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
+reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
+reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
+reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
+reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
+reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
+reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
+reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
+reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
+reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
+reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
+reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
+reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
+reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
+reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
+reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
+
+reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
+reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
+reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
+reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
+reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
+reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
+reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
+reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
+reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
+reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
+reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
+reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
+reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
+reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
+reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
+reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
+
+reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
+reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
+reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
+reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
+reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
+reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
+reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
+reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
+reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
+reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
+reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
+reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
+reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
+reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
+reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
+reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
+
+reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
+reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
+reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
+reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
+reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
+reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
+reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
+reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
+reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
+reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
+reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
+reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
+reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
+reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
+reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
+reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
+
+reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
+reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
+reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
+reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
+reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
+reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
+reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
+reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
+reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
+reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
+reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
+reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
+reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
+reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
+reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
+reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
+
+reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
+reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
+reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
+reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
+reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
+reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
+reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
+reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
+reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
+reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
+reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
+reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
+reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
+reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
+reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
+reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
+
+reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
+reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
+reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
+reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
+reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
+reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
+reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
+reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
+reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
+reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
+reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
+reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
+reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
+reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
+reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
+reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
+
+reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
+reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
+reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
+reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
+reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
+reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
+reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
+reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
+reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
+reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
+reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
+reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
+reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
+reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
+reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
+reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
+
+reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
+reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
+reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
+reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
+reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
+reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
+reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
+reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
+reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
+reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
+reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
+reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
+reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
+reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
+reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
+reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
+
+reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
+reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
+reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
+reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
+reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
+reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
+reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
+reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
+reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
+reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
+reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
+reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
+reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
+reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
+reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
+reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
+
+reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
+reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
+reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
+reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
+reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
+reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
+reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
+reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
+reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
+reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
+reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
+reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
+reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
+reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
+reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
+reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
+
+reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
+reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
+reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
+reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
+reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
+reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
+reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
+reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
+reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
+reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
+reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
+reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
+reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
+reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
+reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
+reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
+
+reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
+reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
+reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
+reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
+reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
+reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
+reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
+reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
+reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
+reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
+reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
+reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
+reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
+reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
+reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
+reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
#endif // _LP64
@@ -320,25 +1075,41 @@
reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
#endif // _LP64
-alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
- XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
- XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
- XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
- XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
- XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
- XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
- XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
+alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
#ifdef _LP64
- ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
- XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
- XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
- XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
- XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
- XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
- XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
- XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
+ ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
+ XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
+ XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
+ XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
+ XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
+ XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
+ XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
+ XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
+ XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
+ XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
+ XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
+ XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
+ XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
+ XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
+ XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
+ XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
#endif
- );
+ );
// flags allocation class should be last.
alloc_class chunk2(RFLAGS);
@@ -346,8 +1117,8 @@
// Singleton class for condition codes
reg_class int_flags(RFLAGS);
-// Class for all float registers
-reg_class float_reg(XMM0,
+// Class for pre evex float registers
+reg_class float_reg_legacy(XMM0,
XMM1,
XMM2,
XMM3,
@@ -367,8 +1138,47 @@
#endif
);
-// Class for all double registers
-reg_class double_reg(XMM0, XMM0b,
+// Class for evex float registers
+reg_class float_reg_evex(XMM0,
+ XMM1,
+ XMM2,
+ XMM3,
+ XMM4,
+ XMM5,
+ XMM6,
+ XMM7
+#ifdef _LP64
+ ,XMM8,
+ XMM9,
+ XMM10,
+ XMM11,
+ XMM12,
+ XMM13,
+ XMM14,
+ XMM15,
+ XMM16,
+ XMM17,
+ XMM18,
+ XMM19,
+ XMM20,
+ XMM21,
+ XMM22,
+ XMM23,
+ XMM24,
+ XMM25,
+ XMM26,
+ XMM27,
+ XMM28,
+ XMM29,
+ XMM30,
+ XMM31
+#endif
+ );
+
+reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
+
+// Class for pre evex double registers
+reg_class double_reg_legacy(XMM0, XMM0b,
XMM1, XMM1b,
XMM2, XMM2b,
XMM3, XMM3b,
@@ -388,8 +1198,47 @@
#endif
);
-// Class for all 32bit vector registers
-reg_class vectors_reg(XMM0,
+// Class for evex double registers
+reg_class double_reg_evex(XMM0, XMM0b,
+ XMM1, XMM1b,
+ XMM2, XMM2b,
+ XMM3, XMM3b,
+ XMM4, XMM4b,
+ XMM5, XMM5b,
+ XMM6, XMM6b,
+ XMM7, XMM7b
+#ifdef _LP64
+ ,XMM8, XMM8b,
+ XMM9, XMM9b,
+ XMM10, XMM10b,
+ XMM11, XMM11b,
+ XMM12, XMM12b,
+ XMM13, XMM13b,
+ XMM14, XMM14b,
+ XMM15, XMM15b,
+ XMM16, XMM16b,
+ XMM17, XMM17b,
+ XMM18, XMM18b,
+ XMM19, XMM19b,
+ XMM20, XMM20b,
+ XMM21, XMM21b,
+ XMM22, XMM22b,
+ XMM23, XMM23b,
+ XMM24, XMM24b,
+ XMM25, XMM25b,
+ XMM26, XMM26b,
+ XMM27, XMM27b,
+ XMM28, XMM28b,
+ XMM29, XMM29b,
+ XMM30, XMM30b,
+ XMM31, XMM31b
+#endif
+ );
+
+reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
+
+// Class for pre evex 32bit vector registers
+reg_class vectors_reg_legacy(XMM0,
XMM1,
XMM2,
XMM3,
@@ -409,8 +1258,47 @@
#endif
);
+// Class for evex 32bit vector registers
+reg_class vectors_reg_evex(XMM0,
+ XMM1,
+ XMM2,
+ XMM3,
+ XMM4,
+ XMM5,
+ XMM6,
+ XMM7
+#ifdef _LP64
+ ,XMM8,
+ XMM9,
+ XMM10,
+ XMM11,
+ XMM12,
+ XMM13,
+ XMM14,
+ XMM15,
+ XMM16,
+ XMM17,
+ XMM18,
+ XMM19,
+ XMM20,
+ XMM21,
+ XMM22,
+ XMM23,
+ XMM24,
+ XMM25,
+ XMM26,
+ XMM27,
+ XMM28,
+ XMM29,
+ XMM30,
+ XMM31
+#endif
+ );
+
+reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
+
// Class for all 64bit vector registers
-reg_class vectord_reg(XMM0, XMM0b,
+reg_class vectord_reg_legacy(XMM0, XMM0b,
XMM1, XMM1b,
XMM2, XMM2b,
XMM3, XMM3b,
@@ -430,8 +1318,47 @@
#endif
);
+// Class for all 64bit vector registers
+reg_class vectord_reg_evex(XMM0, XMM0b,
+ XMM1, XMM1b,
+ XMM2, XMM2b,
+ XMM3, XMM3b,
+ XMM4, XMM4b,
+ XMM5, XMM5b,
+ XMM6, XMM6b,
+ XMM7, XMM7b
+#ifdef _LP64
+ ,XMM8, XMM8b,
+ XMM9, XMM9b,
+ XMM10, XMM10b,
+ XMM11, XMM11b,
+ XMM12, XMM12b,
+ XMM13, XMM13b,
+ XMM14, XMM14b,
+ XMM15, XMM15b,
+ XMM16, XMM16b,
+ XMM17, XMM17b,
+ XMM18, XMM18b,
+ XMM19, XMM19b,
+ XMM20, XMM20b,
+ XMM21, XMM21b,
+ XMM22, XMM22b,
+ XMM23, XMM23b,
+ XMM24, XMM24b,
+ XMM25, XMM25b,
+ XMM26, XMM26b,
+ XMM27, XMM27b,
+ XMM28, XMM28b,
+ XMM29, XMM29b,
+ XMM30, XMM30b,
+ XMM31, XMM31b
+#endif
+ );
+
+reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
+
// Class for all 128bit vector registers
-reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
+reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
XMM1, XMM1b, XMM1c, XMM1d,
XMM2, XMM2b, XMM2c, XMM2d,
XMM3, XMM3b, XMM3c, XMM3d,
@@ -451,8 +1378,47 @@
#endif
);
+// Class for all 128bit vector registers
+reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
+ XMM1, XMM1b, XMM1c, XMM1d,
+ XMM2, XMM2b, XMM2c, XMM2d,
+ XMM3, XMM3b, XMM3c, XMM3d,
+ XMM4, XMM4b, XMM4c, XMM4d,
+ XMM5, XMM5b, XMM5c, XMM5d,
+ XMM6, XMM6b, XMM6c, XMM6d,
+ XMM7, XMM7b, XMM7c, XMM7d
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d,
+ XMM9, XMM9b, XMM9c, XMM9d,
+ XMM10, XMM10b, XMM10c, XMM10d,
+ XMM11, XMM11b, XMM11c, XMM11d,
+ XMM12, XMM12b, XMM12c, XMM12d,
+ XMM13, XMM13b, XMM13c, XMM13d,
+ XMM14, XMM14b, XMM14c, XMM14d,
+ XMM15, XMM15b, XMM15c, XMM15d,
+ XMM16, XMM16b, XMM16c, XMM16d,
+ XMM17, XMM17b, XMM17c, XMM17d,
+ XMM18, XMM18b, XMM18c, XMM18d,
+ XMM19, XMM19b, XMM19c, XMM19d,
+ XMM20, XMM20b, XMM20c, XMM20d,
+ XMM21, XMM21b, XMM21c, XMM21d,
+ XMM22, XMM22b, XMM22c, XMM22d,
+ XMM23, XMM23b, XMM23c, XMM23d,
+ XMM24, XMM24b, XMM24c, XMM24d,
+ XMM25, XMM25b, XMM25c, XMM25d,
+ XMM26, XMM26b, XMM26c, XMM26d,
+ XMM27, XMM27b, XMM27c, XMM27d,
+ XMM28, XMM28b, XMM28c, XMM28d,
+ XMM29, XMM29b, XMM29c, XMM29d,
+ XMM30, XMM30b, XMM30c, XMM30d,
+ XMM31, XMM31b, XMM31c, XMM31d
+#endif
+ );
+
+reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
+
// Class for all 256bit vector registers
-reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
+reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
@@ -472,6 +1438,82 @@
#endif
);
+// Class for all 256bit vector registers
+reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
+ XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
+ XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
+ XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
+ XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
+ XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
+ XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
+ XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
+ XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
+ XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
+ XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
+ XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
+ XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
+ XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
+ XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
+ XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
+ XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
+#endif
+ );
+
+reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
+
+// Class for all 512bit vector registers
+reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
+ ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
+ XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
+ XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
+ XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
+ XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
+ XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
+ XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
+ XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
+ XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
+ XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
+ XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
+ XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
+ XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
+ XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
+ XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
+ XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
+#endif
+ );
+
%}
@@ -623,6 +1665,10 @@
if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
return false;
break;
+ case Op_MulVL:
+ case Op_MulReductionVL:
+ if (VM_Version::supports_avx512dq() == false)
+ return false;
case Op_AddReductionVL:
if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
return false;
@@ -657,10 +1703,11 @@
if (UseSSE < 2) return 0;
// SSE2 supports 128bit vectors for all types.
// AVX2 supports 256bit vectors for all types.
- int size = (UseAVX > 1) ? 32 : 16;
+ // AVX2/EVEX supports 512bit vectors for all types.
+ int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
// AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
- size = 32;
+ size = (UseAVX > 2) ? 64 : 32;
// Use flag to limit vector size.
size = MIN2(size,(int)MaxVectorSize);
// Minimum 2 values in vector (or 4 for bytes).
@@ -702,6 +1749,7 @@
case 8: return Op_VecD;
case 16: return Op_VecX;
case 32: return Op_VecY;
+ case 64: return Op_VecZ;
}
ShouldNotReachHere();
return 0;
@@ -745,6 +1793,9 @@
case Op_VecY:
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
break;
+ case Op_VecZ:
+ __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
+ break;
default:
ShouldNotReachHere();
}
@@ -763,6 +1814,7 @@
st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
break;
case Op_VecY:
+ case Op_VecZ:
st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
break;
default:
@@ -771,7 +1823,7 @@
#endif
}
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
- return 4;
+ return (UseAVX > 2) ? 6 : 4;
}
static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
@@ -796,6 +1848,9 @@
case Op_VecY:
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
break;
+ case Op_VecZ:
+ __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
+ break;
default:
ShouldNotReachHere();
}
@@ -813,13 +1868,16 @@
case Op_VecY:
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
break;
+ case Op_VecZ:
+ __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+ break;
default:
ShouldNotReachHere();
}
}
int size = __ offset() - offset;
#ifdef ASSERT
- int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+ int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
#endif
@@ -838,6 +1896,7 @@
st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
break;
case Op_VecY:
+ case Op_VecZ:
st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
break;
default:
@@ -855,6 +1914,7 @@
st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
break;
case Op_VecY:
+ case Op_VecZ:
st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
break;
default:
@@ -863,7 +1923,7 @@
}
#endif
}
- int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+ int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
return 5+offset_size;
}
@@ -952,40 +2012,15 @@
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.
-// Vectors
-operand vecS() %{
- constraint(ALLOC_IN_RC(vectors_reg));
- match(VecS);
-
- format %{ %}
- interface(REG_INTER);
-%}
-
-operand vecD() %{
- constraint(ALLOC_IN_RC(vectord_reg));
- match(VecD);
+// This one generically applies only for evex, so only one version
+operand vecZ() %{
+ constraint(ALLOC_IN_RC(vectorz_reg));
+ match(VecZ);
format %{ %}
interface(REG_INTER);
%}
-operand vecX() %{
- constraint(ALLOC_IN_RC(vectorx_reg));
- match(VecX);
-
- format %{ %}
- interface(REG_INTER);
-%}
-
-operand vecY() %{
- constraint(ALLOC_IN_RC(vectory_reg));
- match(VecY);
-
- format %{ %}
- interface(REG_INTER);
-%}
-
-
// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
// ============================================================================
@@ -1586,9 +2621,9 @@
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
- bool vector256 = false;
+ int vector_len = 0;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()), vector256);
+ ExternalAddress(float_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
@@ -1612,9 +2647,9 @@
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
- bool vector256 = false;
+ int vector_len = 0;
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()), vector256);
+ ExternalAddress(double_signmask()), vector_len);
%}
ins_pipe(pipe_slow);
%}
@@ -1636,9 +2671,9 @@
ins_cost(150);
format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
ins_encode %{
- bool vector256 = false;
+ int vector_len = 0;
__ vxorps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signflip()), vector256);
+ ExternalAddress(float_signflip()), vector_len);
%}
ins_pipe(pipe_slow);
%}
@@ -1662,9 +2697,9 @@
format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
"# neg double by sign flipping" %}
ins_encode %{
- bool vector256 = false;
+ int vector_len = 0;
__ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signflip()), vector256);
+ ExternalAddress(double_signflip()), vector_len);
%}
ins_pipe(pipe_slow);
%}
@@ -1739,7 +2774,6 @@
ins_pipe(pipe_slow);
%}
-
// ====================VECTOR INSTRUCTIONS=====================================
// Load vectors (4 bytes long)
@@ -1790,6 +2824,19 @@
ins_pipe( pipe_slow );
%}
+// Load vectors (64 bytes long)
+instruct loadV64(vecZ dst, memory mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 64);
+ match(Set dst (LoadVector mem));
+ ins_cost(125);
+ format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Store vectors
instruct storeV4(memory mem, vecS src) %{
predicate(n->as_StoreVector()->memory_size() == 4);
@@ -1835,6 +2882,18 @@
ins_pipe( pipe_slow );
%}
+instruct storeV64(memory mem, vecZ src) %{
+ predicate(n->as_StoreVector()->memory_size() == 64);
+ match(Set mem (StoreVector mem src));
+ ins_cost(145);
+ format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate byte scalar to be vector
instruct Repl4B(vecS dst, rRegI src) %{
predicate(n->as_Vector()->length() == 4);
@@ -1898,6 +2957,26 @@
ins_pipe( pipe_slow );
%}
+instruct Repl64B(vecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 64);
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate byte scalar immediate to be vector by loading from const table.
instruct Repl4B_imm(vecS dst, immI con) %{
predicate(n->as_Vector()->length() == 4);
@@ -1945,6 +3024,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl64B_imm(vecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 64);
+ match(Set dst (ReplicateB con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate byte scalar zero to be vector
instruct Repl4B_zero(vecS dst, immI0 zero) %{
predicate(n->as_Vector()->length() == 4);
@@ -1982,8 +3077,20 @@
format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
ins_encode %{
// Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
- bool vector256 = true;
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl64B_zero(vecZ dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 64);
+ match(Set dst (ReplicateB zero));
+ format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
+ int vector_len = 2;
+ __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( fpu_reg_reg );
%}
@@ -2043,6 +3150,24 @@
ins_pipe( pipe_slow );
%}
+instruct Repl32S(vecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 32);
+ match(Set dst (ReplicateS src));
+ format %{ "movd $dst,$src\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
instruct Repl2S_imm(vecS dst, immI con) %{
predicate(n->as_Vector()->length() == 2);
@@ -2090,6 +3215,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl32S_imm(vecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 32);
+ match(Set dst (ReplicateS con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate char/short (2 byte) scalar zero to be vector
instruct Repl2S_zero(vecS dst, immI0 zero) %{
predicate(n->as_Vector()->length() == 2);
@@ -2127,8 +3268,20 @@
format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
ins_encode %{
// Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
- bool vector256 = true;
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl32S_zero(vecZ dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 32);
+ match(Set dst (ReplicateS zero));
+ format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
+ int vector_len = 2;
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( fpu_reg_reg );
%}
@@ -2172,6 +3325,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16I(vecZ dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
instruct Repl2I_imm(vecD dst, immI con) %{
predicate(n->as_Vector()->length() == 2);
@@ -2209,6 +3378,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16I_imm(vecZ dst, immI con) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateI con));
+ format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Integer could be loaded into xmm register directly from memory.
instruct Repl2I_mem(vecD dst, memory mem) %{
predicate(n->as_Vector()->length() == 2);
@@ -2248,6 +3433,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16I_mem(vecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateI (LoadI mem)));
+ format %{ "movd $dst,$mem\n\t"
+ "pshufd $dst,$dst,0x00\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $mem$$Address);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate integer (4 byte) scalar zero to be vector
instruct Repl2I_zero(vecD dst, immI0 zero) %{
predicate(n->as_Vector()->length() == 2);
@@ -2275,8 +3476,20 @@
format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
ins_encode %{
// Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
- bool vector256 = true;
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16I_zero(vecZ dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateI zero));
+ format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
+ int vector_len = 2;
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( fpu_reg_reg );
%}
@@ -2308,6 +3521,22 @@
%}
ins_pipe( pipe_slow );
%}
+
+instruct Repl8L(vecZ dst, rRegL src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateL src));
+ format %{ "movdq $dst,$src\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
#else // _LP64
instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
predicate(n->as_Vector()->length() == 2);
@@ -2344,6 +3573,26 @@
%}
ins_pipe( pipe_slow );
%}
+
+instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateL src));
+ effect(TEMP dst, USE src, TEMP tmp);
+ format %{ "movdl $dst,$src.lo\n\t"
+ "movdl $tmp,$src.hi\n\t"
+ "punpckldq $dst,$tmp\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
#endif // _LP64
// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
@@ -2373,6 +3622,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl8L_imm(vecZ dst, immL con) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateL con));
+ format %{ "movq $dst,[$constantaddress]\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $constantaddress($con));
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Long could be loaded into xmm register directly from memory.
instruct Repl2L_mem(vecX dst, memory mem) %{
predicate(n->as_Vector()->length() == 2);
@@ -2400,6 +3665,22 @@
ins_pipe( pipe_slow );
%}
+instruct Repl8L_mem(vecZ dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateL (LoadL mem)));
+ format %{ "movq $dst,$mem\n\t"
+ "punpcklqdq $dst,$dst\n\t"
+ "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
+ "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate long (8 byte) scalar zero to be vector
instruct Repl2L_zero(vecX dst, immL0 zero) %{
predicate(n->as_Vector()->length() == 2);
@@ -2417,8 +3698,20 @@
format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
ins_encode %{
// Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
- bool vector256 = true;
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8L_zero(vecZ dst, immL0 zero) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateL zero));
+ format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
+ int vector_len = 2;
+ __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( fpu_reg_reg );
%}
@@ -2456,6 +3749,20 @@
ins_pipe( pipe_slow );
%}
+instruct Repl16F(vecZ dst, regF src) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateF src));
+ format %{ "pshufd $dst,$src,0x00\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t"
+ "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate float (4 byte) scalar zero to be vector
instruct Repl2F_zero(vecD dst, immF0 zero) %{
predicate(n->as_Vector()->length() == 2);
@@ -2482,8 +3789,19 @@
match(Set dst (ReplicateF zero));
format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
ins_encode %{
- bool vector256 = true;
- __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16F_zero(vecZ dst, immF0 zero) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateF zero));
+ format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( fpu_reg_reg );
%}
@@ -2511,6 +3829,20 @@
ins_pipe( pipe_slow );
%}
+instruct Repl8D(vecZ dst, regD src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateD src));
+ format %{ "pshufd $dst,$src,0x44\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t"
+ "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Replicate double (8 byte) scalar zero to be vector
instruct Repl2D_zero(vecX dst, immD0 zero) %{
predicate(n->as_Vector()->length() == 2);
@@ -2527,8 +3859,19 @@
match(Set dst (ReplicateD zero));
format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
ins_encode %{
- bool vector256 = true;
- __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8D_zero(vecZ dst, immD0 zero) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateD zero));
+ format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
%}
ins_pipe( fpu_reg_reg );
%}
@@ -2555,17 +3898,38 @@
%}
instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0);
+ predicate(UseAVX > 0 && UseAVX < 3);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! add reduction2I" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0x1\n\t"
+ "vpaddd $tmp,$src2,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
- "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! add reduction2I" %}
ins_encode %{
- __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
+ int vector_len = 0;
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
- __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
@@ -2593,47 +3957,203 @@
%}
instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0);
+ predicate(UseAVX > 0 && UseAVX < 3);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vphaddd $tmp,$src2,$src2\n\t"
- "vphaddd $tmp,$tmp,$tmp2\n\t"
+ format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ "vphaddd $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! add reduction4I" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0xE\n\t"
+ "vpaddd $tmp,$src2,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
- "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! add reduction4I" %}
ins_encode %{
- __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
- __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ int vector_len = 0;
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
- __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
- predicate(UseAVX > 0);
+ predicate(UseAVX > 0 && UseAVX < 3);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vphaddd $tmp,$src2,$src2\n\t"
+ "vphaddd $tmp,$tmp,$tmp2\n\t"
+ "vextracti128 $tmp2,$tmp\n\t"
+ "vpaddd $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp2,$tmp\n\t"
+ "movd $dst,$tmp2\t! add reduction8I" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vphaddd $tmp,$src2,$src2\n\t"
- "vphaddd $tmp,$tmp,$tmp2\n\t"
- "vextractf128 $tmp2,$tmp\n\t"
+ format %{ "vextracti128 $tmp,$src2\n\t"
+ "vpaddd $tmp,$tmp,$src2\n\t"
+ "pshufd $tmp2,$tmp,0xE\n\t"
+ "vpaddd $tmp,$tmp,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpaddd $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpaddd $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! add reduction8I" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vextracti64x4 $tmp3,$src2\n\t"
+ "vpaddd $tmp3,$tmp3,$src2\n\t"
+ "vextracti128 $tmp,$tmp3\n\t"
+ "vpaddd $tmp,$tmp,$tmp3\n\t"
+ "pshufd $tmp2,$tmp,0xE\n\t"
+ "vpaddd $tmp,$tmp,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
- "vpaddd $tmp2,$tmp2,$tmp\n\t"
- "movd $dst,$tmp2\t! add reduction8I" %}
- ins_encode %{
- __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true);
- __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true);
- __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
- __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ "vpaddd $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction16I" %}
+ ins_encode %{
+ __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+ __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
- __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+ __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
+#ifdef _LP64
+instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVL src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0xE\n\t"
+ "vpaddq $tmp,$src2,$tmp2\n\t"
+ "movdq $tmp2,$src1\n\t"
+ "vpaddq $tmp2,$tmp,$tmp2\n\t"
+ "movdq $dst,$tmp2\t! add reduction2L" %}
+ ins_encode %{
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
+ __ movdq($tmp2$$XMMRegister, $src1$$Register);
+ __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
+ __ movdq($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVL src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t"
+ "vpaddq $tmp2,$tmp,$src2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vpaddq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $tmp,$src1\n\t"
+ "vpaddq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $dst,$tmp2\t! add reduction4L" %}
+ ins_encode %{
+ __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($tmp$$XMMRegister, $src1$$Register);
+ __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVL src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vextracti64x4 $tmp2,$src2\n\t"
+ "vpaddq $tmp2,$tmp2,$src2\n\t"
+ "vextracti128 $tmp,$tmp2\n\t"
+ "vpaddq $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vpaddq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $tmp,$src1\n\t"
+ "vpaddq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $dst,$tmp2\t! add reduction8L" %}
+ ins_encode %{
+ __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($tmp$$XMMRegister, $src1$$Register);
+ __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
+
instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVF src1 src2));
@@ -2757,6 +4277,77 @@
ins_pipe( pipe_slow );
%}
+instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vaddss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x1\n\t"
+ "vaddss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x2\n\t"
+ "vaddss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x3\n\t"
+ "vaddss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vaddss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %}
+ ins_encode %{
+ __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (AddReductionVD src1 src2));
@@ -2812,6 +4403,45 @@
ins_pipe( pipe_slow );
%}
+instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+ predicate(UseAVX > 2);
+ match(Set dst (AddReductionVD src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vaddsd $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x1\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x2\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x3\n\t"
+ "vaddsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %}
+ ins_encode %{
+ __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
predicate(UseSSE > 3 && UseAVX == 0);
match(Set dst (MulReductionVI src1 src2));
@@ -2835,16 +4465,17 @@
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "pshufd $tmp2,$src2,0x1\n\t"
- "vpmulld $tmp,$src2,$tmp2\n\t"
- "movd $tmp2,$src1\n\t"
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
- "movd $dst,$tmp2\t! mul reduction2I" %}
- ins_encode %{
+ format %{ "pshufd $tmp2,$src2,0x1\n\t"
+ "vpmulld $tmp,$src2,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction2I" %}
+ ins_encode %{
+ int vector_len = 0;
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
- __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
- __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
@@ -2877,20 +4508,21 @@
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "pshufd $tmp2,$src2,0xE\n\t"
- "vpmulld $tmp,$src2,$tmp2\n\t"
- "pshufd $tmp2,$tmp,0x1\n\t"
- "vpmulld $tmp,$tmp,$tmp2\n\t"
- "movd $tmp2,$src1\n\t"
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
- "movd $dst,$tmp2\t! mul reduction4I" %}
- ins_encode %{
+ format %{ "pshufd $tmp2,$src2,0xE\n\t"
+ "vpmulld $tmp,$src2,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction4I" %}
+ ins_encode %{
+ int vector_len = 0;
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
- __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
- __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
- __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
@@ -2900,30 +4532,133 @@
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextractf128 $tmp,$src2\n\t"
- "vpmulld $tmp,$tmp,$src2\n\t"
- "pshufd $tmp2,$tmp,0xE\n\t"
- "vpmulld $tmp,$tmp,$tmp2\n\t"
- "pshufd $tmp2,$tmp,0x1\n\t"
- "vpmulld $tmp,$tmp,$tmp2\n\t"
- "movd $tmp2,$src1\n\t"
- "vpmulld $tmp2,$tmp,$tmp2\n\t"
- "movd $dst,$tmp2\t! mul reduction8I" %}
- ins_encode %{
- __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister);
- __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false);
+ format %{ "vextracti128 $tmp,$src2\n\t"
+ "vpmulld $tmp,$tmp,$src2\n\t"
+ "pshufd $tmp2,$tmp,0xE\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction8I" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ movdl($tmp2$$XMMRegister, $src1$$Register);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ movdl($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+ predicate(UseAVX > 2);
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vextracti64x4 $tmp3,$src2\n\t"
+ "vpmulld $tmp3,$tmp3,$src2\n\t"
+ "vextracti128 $tmp,$tmp3\n\t"
+ "vpmulld $tmp,$tmp,$src2\n\t"
+ "pshufd $tmp2,$tmp,0xE\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "pshufd $tmp2,$tmp,0x1\n\t"
+ "vpmulld $tmp,$tmp,$tmp2\n\t"
+ "movd $tmp2,$src1\n\t"
+ "vpmulld $tmp2,$tmp,$tmp2\n\t"
+ "movd $dst,$tmp2\t! mul reduction16I" %}
+ ins_encode %{
+ __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
+ __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
- __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
- __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
- __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+ __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($dst$$Register, $tmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+#ifdef _LP64
+instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
+ match(Set dst (MulReductionVL src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "pshufd $tmp2,$src2,0xE\n\t"
+ "vpmullq $tmp,$src2,$tmp2\n\t"
+ "movdq $tmp2,$src1\n\t"
+ "vpmullq $tmp2,$tmp,$tmp2\n\t"
+ "movdq $dst,$tmp2\t! mul reduction2L" %}
+ ins_encode %{
+ __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
+ __ movdq($tmp2$$XMMRegister, $src1$$Register);
+ __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
+ __ movdq($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
+ match(Set dst (MulReductionVL src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t"
+ "vpmullq $tmp2,$tmp,$src2\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vpmullq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $tmp,$src1\n\t"
+ "vpmullq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $dst,$tmp2\t! mul reduction4L" %}
+ ins_encode %{
+ __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($tmp$$XMMRegister, $src1$$Register);
+ __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
+ match(Set dst (MulReductionVL src1 src2));
+ effect(TEMP tmp, TEMP tmp2);
+ format %{ "vextracti64x4 $tmp2,$src2\n\t"
+ "vpmullq $tmp2,$tmp2,$src2\n\t"
+ "vextracti128 $tmp,$tmp2\n\t"
+ "vpmullq $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp2,0xE\n\t"
+ "vpmullq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $tmp,$src1\n\t"
+ "vpmullq $tmp2,$tmp2,$tmp\n\t"
+ "movdq $dst,$tmp2\t! mul reduction8L" %}
+ ins_encode %{
+ __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
+ __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($tmp$$XMMRegister, $src1$$Register);
+ __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
+ __ movdq($dst$$Register, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
+
+instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVF src1 src2));
effect(TEMP tmp, TEMP tmp2);
@@ -2931,7 +4666,7 @@
"mulss $tmp,$src2\n\t"
"pshufd $tmp2,$src2,0x01\n\t"
"mulss $tmp,$tmp2\n\t"
- "movdqu $dst,$tmp\t! add reduction2F" %}
+ "movdqu $dst,$tmp\t! mul reduction2F" %}
ins_encode %{
__ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
__ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
@@ -2948,7 +4683,7 @@
effect(TEMP tmp, TEMP tmp2);
format %{ "vmulss $tmp2,$src1,$src2\n\t"
"pshufd $tmp,$src2,0x01\n\t"
- "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %}
+ "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %}
ins_encode %{
__ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
@@ -2969,7 +4704,7 @@
"mulss $tmp,$tmp2\n\t"
"pshufd $tmp2,$src2,0x03\n\t"
"mulss $tmp,$tmp2\n\t"
- "movdqu $dst,$tmp\t! add reduction4F" %}
+ "movdqu $dst,$tmp\t! mul reduction4F" %}
ins_encode %{
__ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
__ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
@@ -2994,7 +4729,7 @@
"pshufd $tmp,$src2,0x02\n\t"
"vmulss $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
- "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %}
+ "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %}
ins_encode %{
__ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
@@ -3046,6 +4781,77 @@
ins_pipe( pipe_slow );
%}
+instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+ predicate(UseAVX > 2);
+ match(Set dst (MulReductionVF src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vmulss $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$src2,0x03\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf32x4 $tmp3,$src2, 0x1\n\t"
+ "vmulss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf32x4 $tmp3,$src2, 0x2\n\t"
+ "vmulss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "vextractf32x4 $tmp3,$src2, 0x3\n\t"
+ "vmulss $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0x01\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x02\n\t"
+ "vmulss $tmp2,$tmp2,$tmp\n\t"
+ "pshufd $tmp,$tmp3,0x03\n\t"
+ "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %}
+ ins_encode %{
+ __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+ __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+ __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
predicate(UseSSE >= 1 && UseAVX == 0);
match(Set dst (MulReductionVD src1 src2));
@@ -3053,7 +4859,7 @@
format %{ "movdqu $tmp,$src1\n\t"
"mulsd $tmp,$src2\n\t"
"pshufd $dst,$src2,0xE\n\t"
- "mulsd $dst,$tmp\t! add reduction2D" %}
+ "mulsd $dst,$tmp\t! mul reduction2D" %}
ins_encode %{
__ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
__ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
@@ -3101,6 +4907,45 @@
ins_pipe( pipe_slow );
%}
+instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
+ predicate(UseAVX > 2);
+ match(Set dst (MulReductionVD src1 src2));
+ effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+ format %{ "vmulsd $tmp2,$src1,$src2\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x1\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$src2,0xE\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x2\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp\n\t"
+ "vextractf64x2 $tmp3,$src2, 0x3\n\t"
+ "vmulsd $tmp2,$tmp2,$tmp3\n\t"
+ "pshufd $tmp,$tmp3,0xE\n\t"
+ "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %}
+ ins_encode %{
+ __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+ __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+ __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// ====================VECTOR ARITHMETIC=======================================
// --------------------------------- ADD --------------------------------------
@@ -3121,8 +4966,8 @@
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3142,8 +4987,8 @@
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3163,8 +5008,8 @@
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3174,8 +5019,8 @@
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3185,8 +5030,8 @@
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3196,8 +5041,30 @@
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (AddVB src1 src2));
+ format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (AddVB src (LoadVector mem)));
+ format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3218,8 +5085,8 @@
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3239,8 +5106,8 @@
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3260,8 +5127,8 @@
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3271,8 +5138,8 @@
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3282,8 +5149,8 @@
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3293,8 +5160,30 @@
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (AddVS src1 src2));
+ format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (AddVS src (LoadVector mem)));
+ format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3315,8 +5204,8 @@
match(Set dst (AddVI src1 src2));
format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3336,8 +5225,8 @@
match(Set dst (AddVI src1 src2));
format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3347,8 +5236,8 @@
match(Set dst (AddVI src (LoadVector mem)));
format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3358,8 +5247,8 @@
match(Set dst (AddVI src1 src2));
format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3369,8 +5258,30 @@
match(Set dst (AddVI src (LoadVector mem)));
format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVI src1 src2));
+ format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVI src (LoadVector mem)));
+ format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3391,8 +5302,8 @@
match(Set dst (AddVL src1 src2));
format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3402,8 +5313,8 @@
match(Set dst (AddVL src (LoadVector mem)));
format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3413,8 +5324,8 @@
match(Set dst (AddVL src1 src2));
format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3424,8 +5335,30 @@
match(Set dst (AddVL src (LoadVector mem)));
format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVL src1 src2));
+ format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVL src (LoadVector mem)));
+ format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3446,8 +5379,8 @@
match(Set dst (AddVF src1 src2));
format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
ins_encode %{
- bool vector256 = false;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3467,8 +5400,8 @@
match(Set dst (AddVF src1 src2));
format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3478,8 +5411,8 @@
match(Set dst (AddVF src (LoadVector mem)));
format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3489,8 +5422,8 @@
match(Set dst (AddVF src1 src2));
format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3500,8 +5433,30 @@
match(Set dst (AddVF src (LoadVector mem)));
format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVF src1 src2));
+ format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AddVF src (LoadVector mem)));
+ format %{ "vaddps $dst,$src,$mem\t! add packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3522,8 +5477,8 @@
match(Set dst (AddVD src1 src2));
format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3533,8 +5488,8 @@
match(Set dst (AddVD src (LoadVector mem)));
format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3544,8 +5499,8 @@
match(Set dst (AddVD src1 src2));
format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3555,8 +5510,30 @@
match(Set dst (AddVD src (LoadVector mem)));
format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVD src1 src2));
+ format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AddVD src (LoadVector mem)));
+ format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3579,8 +5556,8 @@
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3600,8 +5577,8 @@
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3621,8 +5598,8 @@
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3632,8 +5609,8 @@
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3643,8 +5620,8 @@
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3654,8 +5631,30 @@
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (SubVB src1 src2));
+ format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (SubVB src (LoadVector mem)));
+ format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3676,8 +5675,8 @@
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3697,8 +5696,8 @@
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3718,8 +5717,8 @@
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3729,8 +5728,8 @@
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3740,8 +5739,8 @@
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3751,8 +5750,30 @@
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (SubVS src1 src2));
+ format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (SubVS src (LoadVector mem)));
+ format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3773,8 +5794,8 @@
match(Set dst (SubVI src1 src2));
format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3794,8 +5815,8 @@
match(Set dst (SubVI src1 src2));
format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3805,8 +5826,8 @@
match(Set dst (SubVI src (LoadVector mem)));
format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3816,8 +5837,8 @@
match(Set dst (SubVI src1 src2));
format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3827,8 +5848,30 @@
match(Set dst (SubVI src (LoadVector mem)));
format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVI src1 src2));
+ format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVI src (LoadVector mem)));
+ format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3849,8 +5892,8 @@
match(Set dst (SubVL src1 src2));
format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3860,8 +5903,8 @@
match(Set dst (SubVL src (LoadVector mem)));
format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3871,8 +5914,8 @@
match(Set dst (SubVL src1 src2));
format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3882,8 +5925,30 @@
match(Set dst (SubVL src (LoadVector mem)));
format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVL src1 src2));
+ format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVL src (LoadVector mem)));
+ format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3904,8 +5969,8 @@
match(Set dst (SubVF src1 src2));
format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
ins_encode %{
- bool vector256 = false;
- __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3925,8 +5990,8 @@
match(Set dst (SubVF src1 src2));
format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3936,8 +6001,8 @@
match(Set dst (SubVF src (LoadVector mem)));
format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3947,8 +6012,8 @@
match(Set dst (SubVF src1 src2));
format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3958,8 +6023,30 @@
match(Set dst (SubVF src (LoadVector mem)));
format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVF src1 src2));
+ format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (SubVF src (LoadVector mem)));
+ format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3980,8 +6067,8 @@
match(Set dst (SubVD src1 src2));
format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -3991,8 +6078,8 @@
match(Set dst (SubVD src (LoadVector mem)));
format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4002,8 +6089,8 @@
match(Set dst (SubVD src1 src2));
format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4013,8 +6100,30 @@
match(Set dst (SubVD src (LoadVector mem)));
format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVD src1 src2));
+ format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (SubVD src (LoadVector mem)));
+ format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4037,8 +6146,8 @@
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4058,8 +6167,8 @@
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4079,8 +6188,8 @@
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4090,8 +6199,8 @@
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4101,8 +6210,8 @@
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4112,8 +6221,30 @@
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (MulVS src1 src2));
+ format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (MulVS src (LoadVector mem)));
+ format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4134,8 +6265,19 @@
match(Set dst (MulVI src1 src2));
format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
+ match(Set dst (MulVL src1 src2));
+ format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4155,8 +6297,8 @@
match(Set dst (MulVI src1 src2));
format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4166,8 +6308,30 @@
match(Set dst (MulVI src (LoadVector mem)));
format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
+ match(Set dst (MulVL src1 src2));
+ format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
+ match(Set dst (MulVL src (LoadVector mem)));
+ format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4177,8 +6341,30 @@
match(Set dst (MulVI src1 src2));
format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
+ match(Set dst (MulVL src1 src2));
+ format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVI src1 src2));
+ format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4188,8 +6374,30 @@
match(Set dst (MulVI src (LoadVector mem)));
format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
+ match(Set dst (MulVL src (LoadVector mem)));
+ format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVI src (LoadVector mem)));
+ format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4210,8 +6418,8 @@
match(Set dst (MulVF src1 src2));
format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
ins_encode %{
- bool vector256 = false;
- __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4231,8 +6439,8 @@
match(Set dst (MulVF src1 src2));
format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4242,8 +6450,8 @@
match(Set dst (MulVF src (LoadVector mem)));
format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4253,8 +6461,8 @@
match(Set dst (MulVF src1 src2));
format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4264,8 +6472,30 @@
match(Set dst (MulVF src (LoadVector mem)));
format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVF src1 src2));
+ format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVF src (LoadVector mem)));
+ format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4286,8 +6516,8 @@
match(Set dst (MulVD src1 src2));
format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4297,8 +6527,8 @@
match(Set dst (MulVD src (LoadVector mem)));
format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4308,8 +6538,8 @@
match(Set dst (MulVD src1 src2));
format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4319,8 +6549,30 @@
match(Set dst (MulVD src (LoadVector mem)));
format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVD src1 src2));
+ format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVD src (LoadVector mem)));
+ format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4343,8 +6595,8 @@
match(Set dst (DivVF src1 src2));
format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
ins_encode %{
- bool vector256 = false;
- __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4364,8 +6616,8 @@
match(Set dst (DivVF src1 src2));
format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4375,8 +6627,8 @@
match(Set dst (DivVF src (LoadVector mem)));
format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
ins_encode %{
- bool vector256 = false;
- __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4386,8 +6638,8 @@
match(Set dst (DivVF src1 src2));
format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4397,8 +6649,30 @@
match(Set dst (DivVF src (LoadVector mem)));
format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
ins_encode %{
- bool vector256 = true;
- __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (DivVF src1 src2));
+ format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+ match(Set dst (DivVF src (LoadVector mem)));
+ format %{ "vdivps $dst,$src,$mem\t! div packed16F" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4419,8 +6693,8 @@
match(Set dst (DivVD src1 src2));
format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4430,8 +6704,8 @@
match(Set dst (DivVD src (LoadVector mem)));
format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
ins_encode %{
- bool vector256 = false;
- __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4441,8 +6715,8 @@
match(Set dst (DivVD src1 src2));
format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4452,8 +6726,30 @@
match(Set dst (DivVD src (LoadVector mem)));
format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
ins_encode %{
- bool vector256 = true;
- __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (DivVD src1 src2));
+ format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (DivVD src (LoadVector mem)));
+ format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4500,8 +6796,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4511,8 +6807,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4542,8 +6838,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4553,8 +6849,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4584,8 +6880,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4595,8 +6891,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4606,8 +6902,8 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4617,8 +6913,30 @@
match(Set dst (LShiftVS src shift));
format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (LShiftVS src shift));
+ format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4649,8 +6967,8 @@
match(Set dst (LShiftVI src shift));
format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4660,8 +6978,8 @@
match(Set dst (LShiftVI src shift));
format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4691,8 +7009,8 @@
match(Set dst (LShiftVI src shift));
format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4702,8 +7020,8 @@
match(Set dst (LShiftVI src shift));
format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4713,8 +7031,8 @@
match(Set dst (LShiftVI src shift));
format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4724,8 +7042,30 @@
match(Set dst (LShiftVI src shift));
format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVI src shift));
+ format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4756,8 +7096,8 @@
match(Set dst (LShiftVL src shift));
format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4767,8 +7107,8 @@
match(Set dst (LShiftVL src shift));
format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4778,8 +7118,8 @@
match(Set dst (LShiftVL src shift));
format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4789,8 +7129,30 @@
match(Set dst (LShiftVL src shift));
format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVL src shift));
+ format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4827,8 +7189,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4838,8 +7200,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4869,8 +7231,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4880,8 +7242,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4911,8 +7273,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4922,8 +7284,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4933,8 +7295,8 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4944,8 +7306,30 @@
match(Set dst (URShiftVS src shift));
format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (URShiftVS src shift));
+ format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4976,8 +7360,8 @@
match(Set dst (URShiftVI src shift));
format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4987,8 +7371,8 @@
match(Set dst (URShiftVI src shift));
format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5018,8 +7402,8 @@
match(Set dst (URShiftVI src shift));
format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5029,8 +7413,8 @@
match(Set dst (URShiftVI src shift));
format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5040,8 +7424,8 @@
match(Set dst (URShiftVI src shift));
format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5051,8 +7435,30 @@
match(Set dst (URShiftVI src shift));
format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (URShiftVI src shift));
+ format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5083,8 +7489,8 @@
match(Set dst (URShiftVL src shift));
format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5094,8 +7500,8 @@
match(Set dst (URShiftVL src shift));
format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5105,8 +7511,8 @@
match(Set dst (URShiftVL src shift));
format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5116,8 +7522,30 @@
match(Set dst (URShiftVL src shift));
format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVL src shift));
+ format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (URShiftVL src shift));
+ format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5150,8 +7578,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5161,8 +7589,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5192,8 +7620,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5203,8 +7631,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5234,8 +7662,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5245,8 +7673,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
ins_encode %{
- bool vector256 = false;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5256,8 +7684,8 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5267,8 +7695,30 @@
match(Set dst (RShiftVS src shift));
format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
ins_encode %{
- bool vector256 = true;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (RShiftVS src shift));
+ format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5299,8 +7749,8 @@
match(Set dst (RShiftVI src shift));
format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5310,8 +7760,8 @@
match(Set dst (RShiftVI src shift));
format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5341,8 +7791,8 @@
match(Set dst (RShiftVI src shift));
format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5352,8 +7802,8 @@
match(Set dst (RShiftVI src shift));
format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
ins_encode %{
- bool vector256 = false;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 0;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5363,8 +7813,8 @@
match(Set dst (RShiftVI src shift));
format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5374,8 +7824,30 @@
match(Set dst (RShiftVI src shift));
format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
ins_encode %{
- bool vector256 = true;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+ int vector_len = 1;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (RShiftVI src shift));
+ format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5400,8 +7872,8 @@
match(Set dst (AndV src1 src2));
format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5421,8 +7893,8 @@
match(Set dst (AndV src1 src2));
format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5442,8 +7914,8 @@
match(Set dst (AndV src1 src2));
format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5453,8 +7925,8 @@
match(Set dst (AndV src (LoadVector mem)));
format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5464,8 +7936,8 @@
match(Set dst (AndV src1 src2));
format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
ins_encode %{
- bool vector256 = true;
- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5475,8 +7947,30 @@
match(Set dst (AndV src (LoadVector mem)));
format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
ins_encode %{
- bool vector256 = true;
- __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
+ match(Set dst (AndV src1 src2));
+ format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
+ match(Set dst (AndV src (LoadVector mem)));
+ format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5498,8 +7992,8 @@
match(Set dst (OrV src1 src2));
format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5519,8 +8013,8 @@
match(Set dst (OrV src1 src2));
format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5540,8 +8034,8 @@
match(Set dst (OrV src1 src2));
format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5551,8 +8045,8 @@
match(Set dst (OrV src (LoadVector mem)));
format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5562,8 +8056,8 @@
match(Set dst (OrV src1 src2));
format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
ins_encode %{
- bool vector256 = true;
- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5573,8 +8067,30 @@
match(Set dst (OrV src (LoadVector mem)));
format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
ins_encode %{
- bool vector256 = true;
- __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 1;
+ __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
+ match(Set dst (OrV src1 src2));
+ format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
+ match(Set dst (OrV src (LoadVector mem)));
+ format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5596,8 +8112,8 @@
match(Set dst (XorV src1 src2));
format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5617,8 +8133,8 @@
match(Set dst (XorV src1 src2));
format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5638,8 +8154,8 @@
match(Set dst (XorV src1 src2));
format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 0;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5649,8 +8165,8 @@
match(Set dst (XorV src (LoadVector mem)));
format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
ins_encode %{
- bool vector256 = false;
- __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+ int vector_len = 0;
+ __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5660,8 +8176,8 @@
match(Set dst (XorV src1 src2));
format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
ins_encode %{
- bool vector256 = true;
- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+ int vector_len = 1;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5671,9 +8187,31 @@
match(Set dst (XorV src (LoadVector mem)));
format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
ins_encode %{
- bool vector256 = true;
- __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
- %}
- ins_pipe( pipe_slow );
-%}
-
+ int vector_len = 1;
+ __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
+ match(Set dst (XorV src1 src2));
+ format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
+ match(Set dst (XorV src (LoadVector mem)));
+ format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+