--- a/src/hotspot/cpu/x86/assembler_x86.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp Tue May 07 13:33:27 2019 -0700
@@ -1894,6 +1894,69 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::pabsb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_ssse3(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x1C);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::pabsw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_ssse3(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x1D);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_ssse3(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x1E);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
+ vector_len == AVX_256bit? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x1C);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
+ vector_len == AVX_256bit? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x1D);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
+ vector_len == AVX_256bit? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit? VM_Version::supports_evex() : 0, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x1E);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x1F);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@@ -3416,10 +3479,19 @@
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x00);
- emit_int8(0xC0 | encode);
+ emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
+void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512F");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x36);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -3884,6 +3956,14 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x20);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
@@ -3905,6 +3985,15 @@
emit_int8((unsigned char) (0xC0 | encode));
}
+void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
+ vector_len == AVX_256bit? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x20);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
@@ -6277,6 +6366,26 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512");
+ assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x72);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8(shift & 0xFF);
+}
+
+void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512");
+ assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0xE2);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
// logical operations packed integers
void Assembler::pand(XMMRegister dst, XMMRegister src) {
--- a/src/hotspot/cpu/x86/assembler_x86.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp Tue May 07 13:33:27 2019 -0700
@@ -1102,6 +1102,15 @@
void cvttpd2dq(XMMRegister dst, XMMRegister src);
+ //Abs of packed Integer values
+ void pabsb(XMMRegister dst, XMMRegister src);
+ void pabsw(XMMRegister dst, XMMRegister src);
+ void pabsd(XMMRegister dst, XMMRegister src);
+ void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
+
// Divide Scalar Double-Precision Floating-Point Values
void divsd(XMMRegister dst, Address src);
void divsd(XMMRegister dst, XMMRegister src);
@@ -1589,6 +1598,7 @@
// Pemutation of 64bit words
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
+ void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1668,6 +1678,10 @@
void evpmovdb(Address dst, XMMRegister src, int vector_len);
+ // Sign extend moves
+ void pmovsxbw(XMMRegister dst, XMMRegister src);
+ void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
+
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2094,6 +2108,8 @@
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+ void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// And packed integers
void pand(XMMRegister dst, XMMRegister src);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp Tue May 07 13:33:27 2019 -0700
@@ -1003,25 +1003,25 @@
}
}
-void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
+void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
// Used in sign-masking with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::andpd(dst, as_Address(src));
} else {
- lea(rscratch1, src);
- Assembler::andpd(dst, Address(rscratch1, 0));
- }
-}
-
-void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
+ lea(scratch_reg, src);
+ Assembler::andpd(dst, Address(scratch_reg, 0));
+ }
+}
+
+void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
// Used in sign-masking with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::andps(dst, as_Address(src));
} else {
- lea(rscratch1, src);
- Assembler::andps(dst, Address(rscratch1, 0));
+ lea(scratch_reg, src);
+ Assembler::andps(dst, Address(scratch_reg, 0));
}
}
@@ -3340,13 +3340,13 @@
Assembler::vmovdqu(dst, src);
}
-void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
+void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
if (reachable(src)) {
vmovdqu(dst, as_Address(src));
}
else {
- lea(rscratch1, src);
- vmovdqu(dst, Address(rscratch1, 0));
+ lea(scratch_reg, src);
+ vmovdqu(dst, Address(scratch_reg, 0));
}
}
@@ -3698,14 +3698,14 @@
}
}
-void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::xorpd(dst, as_Address(src));
} else {
- lea(rscratch1, src);
- Assembler::xorpd(dst, Address(rscratch1, 0));
+ lea(scratch_reg, src);
+ Assembler::xorpd(dst, Address(scratch_reg, 0));
}
}
@@ -3726,14 +3726,14 @@
}
}
-void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
// Used in sign-bit flipping with aligned address.
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
if (reachable(src)) {
Assembler::xorps(dst, as_Address(src));
} else {
- lea(rscratch1, src);
- Assembler::xorps(dst, Address(rscratch1, 0));
+ lea(scratch_reg, src);
+ Assembler::xorps(dst, Address(scratch_reg, 0));
}
}
@@ -3799,12 +3799,12 @@
Assembler::vpaddw(dst, nds, src, vector_len);
}
-void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
+void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::vpand(dst, nds, as_Address(src), vector_len);
} else {
- lea(rscratch1, src);
- Assembler::vpand(dst, nds, Address(rscratch1, 0), vector_len);
+ lea(scratch_reg, src);
+ Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
}
}
@@ -3873,6 +3873,22 @@
Assembler::vpsraw(dst, nds, shift, vector_len);
}
+void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 2,"");
+ if (!VM_Version::supports_avx512vl() && vector_len < 2) {
+ vector_len = 2;
+ }
+ Assembler::evpsraq(dst, nds, shift, vector_len);
+}
+
+void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
+ assert(UseAVX > 2,"");
+ if (!VM_Version::supports_avx512vl() && vector_len < 2) {
+ vector_len = 2;
+ }
+ Assembler::evpsraq(dst, nds, shift, vector_len);
+}
+
void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpsrlw(dst, nds, shift, vector_len);
@@ -3913,21 +3929,21 @@
Assembler::pshuflw(dst, src, mode);
}
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
vandpd(dst, nds, as_Address(src), vector_len);
} else {
- lea(rscratch1, src);
- vandpd(dst, nds, Address(rscratch1, 0), vector_len);
- }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
+ lea(scratch_reg, src);
+ vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
vandps(dst, nds, as_Address(src), vector_len);
} else {
- lea(rscratch1, src);
- vandps(dst, nds, Address(rscratch1, 0), vector_len);
+ lea(scratch_reg, src);
+ vandps(dst, nds, Address(scratch_reg, 0), vector_len);
}
}
@@ -3995,23 +4011,161 @@
vxorpd(dst, nds, src, Assembler::AVX_128bit);
}
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
vxorpd(dst, nds, as_Address(src), vector_len);
} else {
- lea(rscratch1, src);
- vxorpd(dst, nds, Address(rscratch1, 0), vector_len);
- }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
+ lea(scratch_reg, src);
+ vxorpd(dst, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
vxorps(dst, nds, as_Address(src), vector_len);
} else {
- lea(rscratch1, src);
- vxorps(dst, nds, Address(rscratch1, 0), vector_len);
- }
-}
+ lea(scratch_reg, src);
+ vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
+
+void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
+ if (UseAVX > 1 || (vector_len < 1)) {
+ if (reachable(src)) {
+ Assembler::vpxor(dst, nds, as_Address(src), vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
+ }
+ }
+ else {
+ MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
+ }
+}
+
+//-------------------------------------------------------------------------------------------
+#ifdef COMPILER2
+// Generic instructions support for use in .ad files C2 code generation
+
+void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, Register scr) {
+ if (opcode == Op_AbsVD) {
+ andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
+ } else {
+ assert((opcode == Op_NegVD),"opcode should be Op_NegD");
+ xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
+ }
+}
+
+void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
+ if (opcode == Op_AbsVD) {
+ vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
+ } else {
+ assert((opcode == Op_NegVD),"opcode should be Op_NegD");
+ vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr);
+ }
+}
+
+void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, Register scr) {
+ if (opcode == Op_AbsVF) {
+ andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr);
+ } else {
+ assert((opcode == Op_NegVF),"opcode should be Op_NegF");
+ xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr);
+ }
+}
+
+void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
+ if (opcode == Op_AbsVF) {
+ vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr);
+ } else {
+ assert((opcode == Op_NegVF),"opcode should be Op_NegF");
+ vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr);
+ }
+}
+
+void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
+ if (sign) {
+ pmovsxbw(dst, src);
+ } else {
+ pmovzxbw(dst, src);
+ }
+}
+
+void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
+ if (sign) {
+ vpmovsxbw(dst, src, vector_len);
+ } else {
+ vpmovzxbw(dst, src, vector_len);
+ }
+}
+
+void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
+ if (opcode == Op_RShiftVI) {
+ psrad(dst, src);
+ } else if (opcode == Op_LShiftVI) {
+ pslld(dst, src);
+ } else {
+ assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
+ psrld(dst, src);
+ }
+}
+
+void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ if (opcode == Op_RShiftVI) {
+ vpsrad(dst, nds, src, vector_len);
+ } else if (opcode == Op_LShiftVI) {
+ vpslld(dst, nds, src, vector_len);
+ } else {
+ assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
+ vpsrld(dst, nds, src, vector_len);
+ }
+}
+
+void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) {
+ if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
+ psraw(dst, src);
+ } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
+ psllw(dst, src);
+ } else {
+ assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
+ psrlw(dst, src);
+ }
+}
+
+void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
+ vpsraw(dst, nds, src, vector_len);
+ } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
+ vpsllw(dst, nds, src, vector_len);
+ } else {
+ assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
+ vpsrlw(dst, nds, src, vector_len);
+ }
+}
+
+void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) {
+ if (opcode == Op_RShiftVL) {
+ psrlq(dst, src); // using srl to implement sra on pre-avs512 systems
+ } else if (opcode == Op_LShiftVL) {
+ psllq(dst, src);
+ } else {
+ assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
+ psrlq(dst, src);
+ }
+}
+
+void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ if (opcode == Op_RShiftVL) {
+ evpsraq(dst, nds, src, vector_len);
+ } else if (opcode == Op_LShiftVL) {
+ vpsllq(dst, nds, src, vector_len);
+ } else {
+ assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
+ vpsrlq(dst, nds, src, vector_len);
+ }
+}
+#endif
+//-------------------------------------------------------------------------------------------
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp Tue May 07 13:33:27 2019 -0700
@@ -877,12 +877,12 @@
// Floating
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
- void andpd(XMMRegister dst, AddressLiteral src);
+ void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); }
void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
- void andps(XMMRegister dst, AddressLiteral src);
+ void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
@@ -1066,8 +1066,8 @@
// these are private because users should be doing movflt/movdbl
+ void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
- void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
void movss(XMMRegister dst, AddressLiteral src);
@@ -1105,7 +1105,7 @@
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
- void vmovdqu(XMMRegister dst, AddressLiteral src);
+ void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
@@ -1183,12 +1183,12 @@
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
- void xorpd(XMMRegister dst, AddressLiteral src);
+ void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
void xorps(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
- void xorps(XMMRegister dst, AddressLiteral src);
+ void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
// Shuffle Bytes
void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
@@ -1215,7 +1215,7 @@
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
- void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
+ void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
@@ -1241,6 +1241,9 @@
void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+ void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
+ void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
+
void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
@@ -1260,11 +1263,11 @@
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
- void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
+ void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
- void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
+ void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
@@ -1297,11 +1300,11 @@
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
- void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
+ void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
- void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
+ void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
@@ -1315,6 +1318,7 @@
else
Assembler::vxorpd(dst, nds, src, vector_len);
}
+ void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
// Simple version for AVX2 256bit vectors
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
@@ -1601,6 +1605,22 @@
void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
+#ifdef COMPILER2
+ // Generic instructions support for use in .ad files C2 code generation
+ void vabsnegd(int opcode, XMMRegister dst, Register scr);
+ void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
+ void vabsnegf(int opcode, XMMRegister dst, Register scr);
+ void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
+ void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
+ void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
+ void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
+ void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
+ void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
+ void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+#endif
+
// C2 compiled method's prolog code.
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Tue May 07 13:33:27 2019 -0700
@@ -602,7 +602,59 @@
return start;
}
-
+ //---------------------------------------------------------------------------------------------------
+
+ address generate_vector_mask(const char *stub_name, int32_t mask) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ for (int i = 0; i < 16; i++) {
+ __ emit_data(mask, relocInfo::none, 0);
+ }
+
+ return start;
+ }
+
+ address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ for (int i = 0; i < 8; i++) {
+ __ emit_data(masklo, relocInfo::none, 0);
+ __ emit_data(maskhi, relocInfo::none, 0);
+ }
+
+ return start;
+ }
+
+ //----------------------------------------------------------------------------------------------------
+
+ address generate_vector_byte_perm_mask(const char *stub_name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ __ emit_data(0x00000001, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000003, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000005, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000007, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000002, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000004, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+ __ emit_data(0x00000006, relocInfo::none, 0);
+ __ emit_data(0x00000000, relocInfo::none, 0);
+
+ return start;
+ }
//----------------------------------------------------------------------------------------------------
// Non-destructive plausibility checks for oops
@@ -3823,6 +3875,14 @@
//------------------------------------------------------------------------------------------------------------------------
// entry points that are platform specific
+ StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF);
+ StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x80000000);
+ StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF);
+ StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000);
+ StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff);
+ StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
+ StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000);
+
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Tue May 07 13:33:27 2019 -0700
@@ -979,6 +979,40 @@
return start;
}
+ address generate_vector_mask(const char *stub_name, int64_t mask) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+
+ return start;
+ }
+
+ address generate_vector_byte_perm_mask(const char *stub_name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ __ emit_data64(0x0000000000000001, relocInfo::none);
+ __ emit_data64(0x0000000000000003, relocInfo::none);
+ __ emit_data64(0x0000000000000005, relocInfo::none);
+ __ emit_data64(0x0000000000000007, relocInfo::none);
+ __ emit_data64(0x0000000000000000, relocInfo::none);
+ __ emit_data64(0x0000000000000002, relocInfo::none);
+ __ emit_data64(0x0000000000000004, relocInfo::none);
+ __ emit_data64(0x0000000000000006, relocInfo::none);
+
+ return start;
+ }
+
// Non-destructive plausibility checks for oops
//
// Arguments:
@@ -5871,6 +5905,13 @@
StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
+ StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF);
+ StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
+ StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
+ StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
+ StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
+ StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
+ StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
--- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp Tue May 07 13:33:27 2019 -0700
@@ -43,6 +43,13 @@
address StubRoutines::x86::_upper_word_mask_addr = NULL;
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
+address StubRoutines::x86::_vector_short_to_byte_mask = NULL;
+address StubRoutines::x86::_vector_float_sign_mask = NULL;
+address StubRoutines::x86::_vector_float_sign_flip = NULL;
+address StubRoutines::x86::_vector_double_sign_mask = NULL;
+address StubRoutines::x86::_vector_double_sign_flip = NULL;
+address StubRoutines::x86::_vector_byte_perm_mask = NULL;
+address StubRoutines::x86::_vector_long_sign_mask = NULL;
#ifdef _LP64
address StubRoutines::x86::_k256_W_adr = NULL;
address StubRoutines::x86::_k512_W_addr = NULL;
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp Tue May 07 13:33:27 2019 -0700
@@ -102,6 +102,7 @@
static address double_sign_flip() {
return _double_sign_flip;
}
+
#else // !LP64
private:
@@ -139,6 +140,13 @@
//k256 table for sha256
static juint _k256[];
static address _k256_adr;
+ static address _vector_short_to_byte_mask;
+ static address _vector_float_sign_mask;
+ static address _vector_float_sign_flip;
+ static address _vector_double_sign_mask;
+ static address _vector_double_sign_flip;
+ static address _vector_byte_perm_mask;
+ static address _vector_long_sign_mask;
#ifdef _LP64
static juint _k256_W[];
static address _k256_W_adr;
@@ -212,6 +220,33 @@
static address upper_word_mask_addr() { return _upper_word_mask_addr; }
static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
static address k256_addr() { return _k256_adr; }
+
+ static address vector_short_to_byte_mask() {
+ return _vector_short_to_byte_mask;
+ }
+ static address vector_float_sign_mask() {
+ return _vector_float_sign_mask;
+ }
+
+ static address vector_float_sign_flip() {
+ return _vector_float_sign_flip;
+ }
+
+ static address vector_double_sign_mask() {
+ return _vector_double_sign_mask;
+ }
+
+ static address vector_double_sign_flip() {
+ return _vector_double_sign_flip;
+ }
+
+ static address vector_byte_perm_mask() {
+ return _vector_byte_perm_mask;
+ }
+
+ static address vector_long_sign_mask() {
+ return _vector_long_sign_mask;
+ }
#ifdef _LP64
static address k256_W_addr() { return _k256_W_adr; }
static address k512_W_addr() { return _k512_W_addr; }
--- a/src/hotspot/cpu/x86/x86.ad Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/x86.ad Tue May 07 13:33:27 2019 -0700
@@ -1372,14 +1372,20 @@
static address double_signmask() { return (address)double_signmask_pool; }
static address double_signflip() { return (address)double_signflip_pool; }
#endif
-
-
+ static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
+ static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
+ static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
+
+//=============================================================================
const bool Matcher::match_rule_supported(int opcode) {
if (!has_match_rule(opcode))
return false;
bool ret_value = true;
switch (opcode) {
+ case Op_AbsVL:
+ if (UseAVX < 3)
+ ret_value = false;
case Op_PopCountI:
case Op_PopCountL:
if (!UsePopCountInstruction)
@@ -1402,6 +1408,9 @@
if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
ret_value = false;
break;
+ case Op_AbsVB:
+ case Op_AbsVS:
+ case Op_AbsVI:
case Op_AddReductionVI:
if (UseSSE < 3) // requires at least SSE3
ret_value = false;
@@ -1447,9 +1456,19 @@
ret_value = false;
break;
case Op_MulAddVS2VI:
+ case Op_RShiftVL:
+ case Op_AbsVD:
+ case Op_NegVD:
if (UseSSE < 2)
ret_value = false;
break;
+ case Op_MulVB:
+ case Op_LShiftVB:
+ case Op_RShiftVB:
+ case Op_URShiftVB:
+ if (UseSSE < 4)
+ ret_value = false;
+ break;
#ifdef _LP64
case Op_MaxD:
case Op_MaxF:
@@ -1470,24 +1489,42 @@
bool ret_value = match_rule_supported(opcode);
if (ret_value) {
switch (opcode) {
+ case Op_AbsVB:
case Op_AddVB:
case Op_SubVB:
if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
ret_value = false;
break;
- case Op_URShiftVS:
- case Op_RShiftVS:
- case Op_LShiftVS:
- case Op_MulVS:
+ case Op_AbsVS:
case Op_AddVS:
case Op_SubVS:
+ case Op_MulVS:
+ case Op_LShiftVS:
+ case Op_RShiftVS:
+ case Op_URShiftVS:
if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
ret_value = false;
break;
+ case Op_MulVB:
+ case Op_LShiftVB:
+ case Op_RShiftVB:
+ case Op_URShiftVB:
+ if ((vlen == 32 && UseAVX < 2) ||
+ ((vlen == 64) && (VM_Version::supports_avx512bw() == false)))
+ ret_value = false;
+ break;
+ case Op_NegVF:
+ if ((vlen == 16) && (VM_Version::supports_avx512dq() == false))
+ ret_value = false;
+ break;
case Op_CMoveVF:
if (vlen != 8)
ret_value = false;
break;
+ case Op_NegVD:
+ if ((vlen == 8) && (VM_Version::supports_avx512dq() == false))
+ ret_value = false;
+ break;
case Op_CMoveVD:
if (vlen != 4)
ret_value = false;
@@ -7302,6 +7339,186 @@
// --------------------------------- MUL --------------------------------------
+// Byte vector mul
+instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
+ match(Set dst (MulVB src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"pmovsxbw $tmp,$src1\n\t"
+ "pmovsxbw $dst,$src2\n\t"
+ "pmullw $tmp,$dst\n\t"
+ "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "pand $dst,$tmp\n\t"
+ "packuswb $dst,$dst\t! mul packed4B" %}
+ ins_encode %{
+ __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
+ match(Set dst (MulVB src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"pmovsxbw $tmp,$src1\n\t"
+ "pmovsxbw $dst,$src2\n\t"
+ "pmullw $tmp,$dst\n\t"
+ "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "pand $dst,$tmp\n\t"
+ "packuswb $dst,$dst\t! mul packed8B" %}
+ ins_encode %{
+ __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
+ __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
+ __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVB src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
+ format %{"pmovsxbw $tmp1,$src1\n\t"
+ "pmovsxbw $tmp2,$src2\n\t"
+ "pmullw $tmp1,$tmp2\n\t"
+ "pshufd $tmp2,$src1,0xEE\n\t"
+ "pshufd $dst,$src2,0xEE\n\t"
+ "pmovsxbw $tmp2,$tmp2\n\t"
+ "pmovsxbw $dst,$dst\n\t"
+ "pmullw $tmp2,$dst\n\t"
+ "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "pand $tmp2,$dst\n\t"
+ "pand $dst,$tmp1\n\t"
+ "packuswb $dst,$tmp2\t! mul packed16B" %}
+ ins_encode %{
+ __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister);
+ __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE);
+ __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE);
+ __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+ __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
+ __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
+ __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (MulVB src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"vpmovsxbw $tmp,$src1\n\t"
+ "vpmovsxbw $dst,$src2\n\t"
+ "vpmullw $tmp,$tmp,$dst\n\t"
+ "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "vpand $dst,$dst,$tmp\n\t"
+ "vextracti128_high $tmp,$dst\n\t"
+ "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
+ __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (MulVB src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
+ format %{"vextracti128_high $tmp1,$src1\n\t"
+ "vextracti128_high $dst,$src2\n\t"
+ "vpmovsxbw $tmp1,$tmp1\n\t"
+ "vpmovsxbw $dst,$dst\n\t"
+ "vpmullw $tmp1,$tmp1,$dst\n\t"
+ "vpmovsxbw $tmp2,$src1\n\t"
+ "vpmovsxbw $dst,$src2\n\t"
+ "vpmullw $tmp2,$tmp2,$dst\n\t"
+ "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t"
+ "vpbroadcastd $dst, $dst\n\t"
+ "vpand $tmp1,$tmp1,$dst\n\t"
+ "vpand $dst,$dst,$tmp2\n\t"
+ "vpackuswb $dst,$dst,$tmp1\n\t"
+ "vpermq $dst, $dst, 0xD8\t! mul packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
+ __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
+ __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
+ __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
+ __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
+ __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (MulVB src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
+ format %{"vextracti64x4_high $tmp1,$src1\n\t"
+ "vextracti64x4_high $dst,$src2\n\t"
+ "vpmovsxbw $tmp1,$tmp1\n\t"
+ "vpmovsxbw $dst,$dst\n\t"
+ "vpmullw $tmp1,$tmp1,$dst\n\t"
+ "vpmovsxbw $tmp2,$src1\n\t"
+ "vpmovsxbw $dst,$src2\n\t"
+ "vpmullw $tmp2,$tmp2,$dst\n\t"
+ "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t"
+ "vpbroadcastd $dst, $dst\n\t"
+ "vpand $tmp1,$tmp1,$dst\n\t"
+ "vpand $tmp2,$tmp2,$dst\n\t"
+ "vpackuswb $dst,$tmp1,$tmp2\n\t"
+ "evmovdquq $tmp2,[0x0604020007050301]\n\t"
+ "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %}
+
+ ins_encode %{
+ int vector_len = 2;
+ __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
+ __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
+ __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
+ __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
+ __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
+ __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
+ __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Shorts/Chars vector mul
instruct vmul2S(vecS dst, vecS src) %{
predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
@@ -8024,20 +8241,6 @@
ins_pipe( pipe_slow );
%}
-// ------------------------------ Shift ---------------------------------------
-
-// Left and right shift count vectors are the same on x86
-// (only lowest bits of xmm reg are used for count).
-instruct vshiftcnt(vecS dst, rRegI cnt) %{
- match(Set dst (LShiftCntV cnt));
- match(Set dst (RShiftCntV cnt));
- format %{ "movd $dst,$cnt\t! load shift count" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $cnt$$Register);
- %}
- ins_pipe( pipe_slow );
-%}
-
// --------------------------------- Sqrt --------------------------------------
// Floating point vector sqrt
@@ -8195,1092 +8398,478 @@
ins_pipe( pipe_slow );
%}
-// ------------------------------ LeftShift -----------------------------------
-
-// Shorts/Chars vector left shift
-instruct vsll2S(vecS dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed2S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_imm(vecS dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed2S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed4S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed4S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed8S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS dst shift));
- format %{ "psllw $dst,$shift\t! left shift packed8S" %}
- ins_encode %{
- __ psllw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
+// ------------------------------ Shift ---------------------------------------
+
+// Left and right shift count vectors are the same on x86
+// (only lowest bits of xmm reg are used for count).
+instruct vshiftcnt(vecS dst, rRegI cnt) %{
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "movdl $dst,$cnt\t! load shift count" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $cnt$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{
+ match(Set dst cnt);
+ effect(TEMP tmp);
+ format %{ "movl $tmp,$cnt\t"
+ "movdl $dst,$tmp\t! load shift count" %}
+ ins_encode %{
+ __ movl($tmp$$Register, $cnt$$constant);
+ __ movdl($dst$$XMMRegister, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Byte vector shift
+instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"vextendbw $tmp,$src\n\t"
+ "vshiftw $tmp,$shift\n\t"
+ "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "pand $dst,$tmp\n\t"
+ "packuswb $dst,$dst\n\t ! packed4B shift" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+
+ __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
+ __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{
+ predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"vextendbw $tmp,$src\n\t"
+ "vshiftw $tmp,$shift\n\t"
+ "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "pand $dst,$tmp\n\t"
+ "packuswb $dst,$dst\n\t ! packed8B shift" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+
+ __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
+ __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{
+ predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
+ format %{"vextendbw $tmp1,$src\n\t"
+ "vshiftw $tmp1,$shift\n\t"
+ "pshufd $tmp2,$src\n\t"
+ "vextendbw $tmp2,$tmp2\n\t"
+ "vshiftw $tmp2,$shift\n\t"
+ "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "pand $tmp2,$dst\n\t"
+ "pand $dst,$tmp1\n\t"
+ "packuswb $dst,$tmp2\n\t! packed16B shift" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+
+ __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
+ __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
+ __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
+ __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
+ __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
+ __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
+ __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
- ins_encode %{
+ match(Set dst (LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"vextendbw $tmp,$src\n\t"
+ "vshiftw $tmp,$tmp,$shift\n\t"
+ "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
+ "vextracti128_high $dst,$tmp\n\t"
+ "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+
int vector_len = 1;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (LShiftVS src shift));
- format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
- ins_encode %{
+ __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
+ __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{"vextracti128_high $tmp,$src\n\t"
+ "vextendbw $tmp,$tmp\n\t"
+ "vextendbw $dst,$src\n\t"
+ "vshiftw $tmp,$tmp,$shift\n\t"
+ "vshiftw $dst,$dst,$shift\n\t"
+ "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
+ "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "vpackuswb $dst,$dst,$tmp\n\t"
+ "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+
+ int vector_len = 1;
+ __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
+ __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
+ format %{"vextracti64x4 $tmp1,$src\n\t"
+ "vextendbw $tmp1,$tmp1\n\t"
+ "vextendbw $tmp2,$src\n\t"
+ "vshiftw $tmp1,$tmp1,$shift\n\t"
+ "vshiftw $tmp2,$tmp2,$shift\n\t"
+ "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
+ "vpbroadcastd $dst,$dst\n\t"
+ "vpand $tmp1,$tmp1,$dst\n\t"
+ "vpand $tmp2,$tmp2,$dst\n\t"
+ "vpackuswb $dst,$tmp1,$tmp2\n\t"
+ "evmovdquq $tmp2, [0x0604020007050301]\n\t"
+ "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+
int vector_len = 2;
- __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Integers vector left shift
-instruct vsll2I(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVI dst shift));
- format %{ "pslld $dst,$shift\t! left shift packed2I" %}
- ins_encode %{
- __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2I_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVI dst shift));
- format %{ "pslld $dst,$shift\t! left shift packed2I" %}
- ins_encode %{
- __ pslld($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4I(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVI dst shift));
- format %{ "pslld $dst,$shift\t! left shift packed4I" %}
- ins_encode %{
- __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4I_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVI dst shift));
- format %{ "pslld $dst,$shift\t! left shift packed4I" %}
- ins_encode %{
- __ pslld($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
- ins_encode %{
- int vector_len = 1;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
- ins_encode %{
- int vector_len = 1;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
- ins_encode %{
- int vector_len = 2;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (LShiftVI src shift));
- format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
- ins_encode %{
- int vector_len = 2;
- __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Longs vector left shift
-instruct vsll2L(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVL dst shift));
- format %{ "psllq $dst,$shift\t! left shift packed2L" %}
- ins_encode %{
- __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2L_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVL dst shift));
- format %{ "psllq $dst,$shift\t! left shift packed2L" %}
- ins_encode %{
- __ psllq($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (LShiftVL src shift));
- format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// ----------------------- LogicalRightShift -----------------------------------
+ __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
+ __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
+ __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
+ __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
+ __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
+ __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
// Shorts vector logical right shift produces incorrect Java result
// for negative data because java code convert short value into int with
// sign extension before a shift. But char vectors are fine since chars are
// unsigned values.
-
-instruct vsrl2S(vecS dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_imm(vecS dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+// Shorts/Chars vector left shift
+instruct vshist2S(vecS dst, vecS src, vecS shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (RShiftVS src shift));
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ format %{ "vshiftw $dst,$src,$shift\t! shift packed2S" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if (UseAVX == 0) {
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
+ } else {
+ int vector_len = 0;
+ __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift4S(vecD dst, vecD src, vecS shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (RShiftVS src shift));
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ format %{ "vshiftw $dst,$src,$shift\t! shift packed4S" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if (UseAVX == 0) {
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
+
+ } else {
+ int vector_len = 0;
+ __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift8S(vecX dst, vecX src, vecS shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (RShiftVS src shift));
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS dst shift));
- format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ format %{ "vshiftw $dst,$src,$shift\t! shift packed8S" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if (UseAVX == 0) {
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
+ } else {
+ int vector_len = 0;
+ __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16S(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (RShiftVS src shift));
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
+ format %{ "vshiftw $dst,$src,$shift\t! shift packed16S" %}
ins_encode %{
int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+ match(Set dst (LShiftVS src shift));
+ match(Set dst (RShiftVS src shift));
match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (URShiftVS src shift));
- format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
+ format %{ "vshiftw $dst,$src,$shift\t! shift packed32S" %}
ins_encode %{
int vector_len = 2;
- __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Integers vector logical right shift
-instruct vsrl2I(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVI dst shift));
- format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
- ins_encode %{
- __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2I_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVI dst shift));
- format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
- ins_encode %{
- __ psrld($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integers vector left shift
+instruct vshift2I(vecD dst, vecD src, vecS shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVI src shift));
+ match(Set dst (RShiftVI src shift));
match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4I(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVI dst shift));
- format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
- ins_encode %{
- __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4I_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVI dst shift));
- format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
- ins_encode %{
- __ psrld($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ format %{ "vshiftd $dst,$src,$shift\t! shift packed2I" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if (UseAVX == 0) {
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
+ } else {
+ int vector_len = 0;
+ __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift4I(vecX dst, vecX src, vecS shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVI src shift));
+ match(Set dst (RShiftVI src shift));
match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ format %{ "vshiftd $dst,$src,$shift\t! shift packed4I" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if (UseAVX == 0) {
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
+ } else {
+ int vector_len = 0;
+ __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift8I(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVI src shift));
+ match(Set dst (RShiftVI src shift));
match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
+ format %{ "vshiftd $dst,$src,$shift\t! shift packed8I" %}
ins_encode %{
int vector_len = 1;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (LShiftVI src shift));
+ match(Set dst (RShiftVI src shift));
match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (URShiftVI src shift));
- format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
+ format %{ "vshiftd $dst,$src,$shift\t! shift packed16I" %}
ins_encode %{
int vector_len = 2;
- __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Longs vector logical right shift
-instruct vsrl2L(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVL dst shift));
- format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
- ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Longs vector shift
+instruct vshift2L(vecX dst, vecX src, vecS shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (LShiftVL src shift));
+ match(Set dst (URShiftVL src shift));
+ format %{ "vshiftq $dst,$src,$shift\t! shift packed2L" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if (UseAVX == 0) {
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
+ } else {
+ int vector_len = 0;
+ __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift4L(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+ match(Set dst (LShiftVL src shift));
+ match(Set dst (URShiftVL src shift));
+ format %{ "vshiftq $dst,$src,$shift\t! left shift packed4L" %}
+ ins_encode %{
+ int vector_len = 1;
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (LShiftVL src shift));
+ match(Set dst (RShiftVL src shift));
+ match(Set dst (URShiftVL src shift));
+ format %{ "vshiftq $dst,$src,$shift\t! shift packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// -------------------ArithmeticRightShift -----------------------------------
+// Long vector arithmetic right shift
+instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
+ predicate(UseSSE >= 2 && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVL src shift));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{ "movdqu $dst,$src\n\t"
+ "psrlq $dst,$shift\n\t"
+ "movdqu $tmp,[0x8000000000000000]\n\t"
+ "psrlq $tmp,$shift\n\t"
+ "pxor $dst,$tmp\n\t"
+ "psubq $dst,$tmp\t! arithmetic right shift packed2L" %}
+ ins_encode %{
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
__ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2L_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVL dst shift));
- format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
- ins_encode %{
- __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVL src shift));
- format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (URShiftVL src shift));
- format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
+ __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
+ __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
+ __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 2);
+ match(Set dst (RShiftVL src shift));
+ format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVL src shift));
- format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
+ match(Set dst (RShiftVL src shift));
+ effect(TEMP dst, TEMP tmp, TEMP scratch);
+ format %{ "vpsrlq $dst,$src,$shift\n\t"
+ "vmovdqu $tmp,[0x8000000000000000]\n\t"
+ "vpsrlq $tmp,$tmp,$shift\n\t"
+ "vpxor $dst,$dst,$tmp\n\t"
+ "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %}
ins_encode %{
int vector_len = 1;
__ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
- match(Set dst (URShiftVL src shift));
- format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
+ __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
+ __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 4);
+ match(Set dst (RShiftVL src shift));
+ format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %}
ins_encode %{
int vector_len = 1;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVL src shift));
- format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
- match(Set dst (URShiftVL src shift));
- format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// ------------------- ArithmeticRightShift -----------------------------------
-
-// Shorts/Chars vector arithmetic right shift
-instruct vsra2S(vecS dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS dst shift));
- format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_imm(vecS dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS dst shift));
- format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- __ psraw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS dst shift));
- format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS dst shift));
- format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- __ psraw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS dst shift));
- format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS dst shift));
- format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- __ psraw($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
- match(Set dst (RShiftVS src shift));
- format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Integers vector arithmetic right shift
-instruct vsra2I(vecD dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVI dst shift));
- format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
- ins_encode %{
- __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2I_imm(vecD dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVI dst shift));
- format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
- ins_encode %{
- __ psrad($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4I(vecX dst, vecS shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVI dst shift));
- format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
- ins_encode %{
- __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4I_imm(vecX dst, immI8 shift) %{
- predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVI dst shift));
- format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
- ins_encode %{
- __ psrad($dst$$XMMRegister, (int)$shift$$constant);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
- ins_encode %{
- int vector_len = 0;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
- predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
- ins_encode %{
- int vector_len = 1;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
- predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
- match(Set dst (RShiftVI src shift));
- format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
- ins_encode %{
- int vector_len = 2;
- __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// There are no longs vector arithmetic right shift instructions.
-
+ __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
// --------------------------------- AND --------------------------------------
@@ -9708,6 +9297,291 @@
ins_pipe( pipe_slow );
%}
+// --------------------------------- ABS --------------------------------------
+// a = |a|
+instruct vabs4B_reg(vecS dst, vecS src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 4);
+ match(Set dst (AbsVB src));
+ format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %}
+ ins_encode %{
+ __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs8B_reg(vecD dst, vecD src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AbsVB src));
+ format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %}
+ ins_encode %{
+ __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs16B_reg(vecX dst, vecX src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AbsVB src));
+ format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %}
+ ins_encode %{
+ __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs32B_reg(vecY dst, vecY src) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+ match(Set dst (AbsVB src));
+ format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs64B_reg(vecZ dst, vecZ src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
+ match(Set dst (AbsVB src));
+ format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs2S_reg(vecD dst, vecD src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 2);
+ match(Set dst (AbsVS src));
+ format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %}
+ ins_encode %{
+ __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs4S_reg(vecD dst, vecD src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 4);
+ match(Set dst (AbsVS src));
+ format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %}
+ ins_encode %{
+ __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs8S_reg(vecX dst, vecX src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AbsVS src));
+ format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %}
+ ins_encode %{
+ __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs16S_reg(vecY dst, vecY src) %{
+ predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+ match(Set dst (AbsVS src));
+ format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs32S_reg(vecZ dst, vecZ src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
+ match(Set dst (AbsVS src));
+ format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs2I_reg(vecD dst, vecD src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 2);
+ match(Set dst (AbsVI src));
+ format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %}
+ ins_encode %{
+ __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs4I_reg(vecX dst, vecX src) %{
+ predicate(UseSSE > 2 && n->as_Vector()->length() == 4);
+ match(Set dst (AbsVI src));
+ format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %}
+ ins_encode %{
+ __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs8I_reg(vecY dst, vecY src) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AbsVI src));
+ format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs16I_reg(vecZ dst, vecZ src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AbsVI src));
+ format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs2L_reg(vecX dst, vecX src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 2);
+ match(Set dst (AbsVL src));
+ format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %}
+ ins_encode %{
+ int vector_len = 0;
+ __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs4L_reg(vecY dst, vecY src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 4);
+ match(Set dst (AbsVL src));
+ format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %}
+ ins_encode %{
+ int vector_len = 1;
+ __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabs8L_reg(vecZ dst, vecZ src) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AbsVL src));
+ format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %}
+ ins_encode %{
+ int vector_len = 2;
+ __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- ABSNEG --------------------------------------
+
+instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{
+ predicate(UseSSE >= 2 && n->as_Vector()->length() == 2);
+ match(Set dst (AbsVD src));
+ match(Set dst (NegVD src));
+ effect(TEMP scratch);
+ format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed2D" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ __ vabsnegd(opcode, $dst$$XMMRegister, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AbsVD src));
+ match(Set dst (NegVD src));
+ effect(TEMP scratch);
+ format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed4D" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ int vector_len = 1;
+ __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+ match(Set dst (AbsVD src));
+ match(Set dst (NegVD src));
+ effect(TEMP scratch);
+ format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed8D" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ int vector_len = 2;
+ __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{
+ predicate(UseSSE > 0 && n->as_Vector()->length() == 2);
+ match(Set dst (AbsVF src));
+ match(Set dst (NegVF src));
+ effect(TEMP scratch);
+ format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed2F" %}
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ if ($dst$$XMMRegister != $src$$XMMRegister)
+ __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+ __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabsneg4F(vecX dst, rRegI scratch) %{
+ predicate(UseSSE > 0 && n->as_Vector()->length() == 4);
+ match(Set dst (AbsVF dst));
+ match(Set dst (NegVF dst));
+ effect(TEMP scratch);
+ format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
+ ins_cost(150);
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{
+ predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ match(Set dst (AbsVF src));
+ match(Set dst (NegVF src));
+ effect(TEMP scratch);
+ format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed8F" %}
+ ins_cost(150);
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ int vector_len = 1;
+ __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{
+ predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
+ match(Set dst (AbsVF src));
+ match(Set dst (NegVF src));
+ effect(TEMP scratch);
+ format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed16F" %}
+ ins_cost(150);
+ ins_encode %{
+ int opcode = this->as_Mach()->ideal_Opcode();
+ int vector_len = 2;
+ __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// --------------------------------- FMA --------------------------------------
// a * b + c
--- a/src/hotspot/cpu/x86/x86_32.ad Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/x86_32.ad Tue May 07 13:33:27 2019 -0700
@@ -8949,6 +8949,28 @@
ins_pipe(ialu_reg_reg_alu0);
%}
+// Integer Absolute Instructions
+instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
+%{
+ match(Set dst (AbsI src));
+ effect(TEMP dst, TEMP tmp, KILL cr);
+ format %{ "movl $tmp, $src\n\t"
+ "sarl $tmp, 31\n\t"
+ "movl $dst, $src\n\t"
+ "xorl $dst, $tmp\n\t"
+ "subl $dst, $tmp\n"
+ %}
+ ins_encode %{
+ __ movl($tmp$$Register, $src$$Register);
+ __ sarl($tmp$$Register, 31);
+ __ movl($dst$$Register, $src$$Register);
+ __ xorl($dst$$Register, $tmp$$Register);
+ __ subl($dst$$Register, $tmp$$Register);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
//----------Long Instructions------------------------------------------------
// Add Long Register with Register
instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
--- a/src/hotspot/cpu/x86/x86_64.ad Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/cpu/x86/x86_64.ad Tue May 07 13:33:27 2019 -0700
@@ -8181,6 +8181,52 @@
ins_pipe( pipe_cmpxchg );
%}
+//----------Abs Instructions-------------------------------------------
+
+// Integer Absolute Instructions
+instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr)
+%{
+ match(Set dst (AbsI src));
+ effect(TEMP dst, TEMP tmp, KILL cr);
+ format %{ "movl $tmp, $src\n\t"
+ "sarl $tmp, 31\n\t"
+ "movl $dst, $src\n\t"
+ "xorl $dst, $tmp\n\t"
+ "subl $dst, $tmp\n"
+ %}
+ ins_encode %{
+ __ movl($tmp$$Register, $src$$Register);
+ __ sarl($tmp$$Register, 31);
+ __ movl($dst$$Register, $src$$Register);
+ __ xorl($dst$$Register, $tmp$$Register);
+ __ subl($dst$$Register, $tmp$$Register);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
+// Long Absolute Instructions
+instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr)
+%{
+ match(Set dst (AbsL src));
+ effect(TEMP dst, TEMP tmp, KILL cr);
+ format %{ "movq $tmp, $src\n\t"
+ "sarq $tmp, 63\n\t"
+ "movq $dst, $src\n\t"
+ "xorq $dst, $tmp\n\t"
+ "subq $dst, $tmp\n"
+ %}
+ ins_encode %{
+ __ movq($tmp$$Register, $src$$Register);
+ __ sarq($tmp$$Register, 63);
+ __ movq($dst$$Register, $src$$Register);
+ __ xorq($dst$$Register, $tmp$$Register);
+ __ subq($dst$$Register, $tmp$$Register);
+ %}
+
+ ins_pipe(ialu_reg_reg);
+%}
+
//----------Subtraction Instructions-------------------------------------------
// Integer Subtraction Instructions
--- a/src/hotspot/share/adlc/formssel.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/adlc/formssel.cpp Tue May 07 13:33:27 2019 -0700
@@ -3808,7 +3808,7 @@
"MaxI","MinI","MaxF","MinF","MaxD","MinD",
"MaxV", "MinV",
"MulI","MulL","MulF","MulD",
- "MulVS","MulVI","MulVL","MulVF","MulVD",
+ "MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"OrI","OrL",
"OrV",
"XorI","XorL",
@@ -4175,10 +4175,10 @@
static const char *vector_list[] = {
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
- "MulVS","MulVI","MulVL","MulVF","MulVD",
+ "MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"CMoveVD", "CMoveVF",
"DivVF","DivVD",
- "AbsVF","AbsVD",
+ "AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
"NegVF","NegVD",
"SqrtVD","SqrtVF",
"AndV" ,"XorV" ,"OrV",
--- a/src/hotspot/share/classfile/vmSymbols.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/classfile/vmSymbols.cpp Tue May 07 13:33:27 2019 -0700
@@ -363,6 +363,9 @@
case vmIntrinsics::_isInstance:
case vmIntrinsics::_currentThread:
case vmIntrinsics::_dabs:
+ case vmIntrinsics::_fabs:
+ case vmIntrinsics::_iabs:
+ case vmIntrinsics::_labs:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
@@ -404,6 +407,9 @@
case vmIntrinsics::_longBitsToDouble:
case vmIntrinsics::_currentThread:
case vmIntrinsics::_dabs:
+ case vmIntrinsics::_fabs:
+ case vmIntrinsics::_iabs:
+ case vmIntrinsics::_labs:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
@@ -567,6 +573,9 @@
case vmIntrinsics::_doubleToRawLongBits:
case vmIntrinsics::_longBitsToDouble:
case vmIntrinsics::_dabs:
+ case vmIntrinsics::_fabs:
+ case vmIntrinsics::_iabs:
+ case vmIntrinsics::_labs:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
--- a/src/hotspot/share/classfile/vmSymbols.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/classfile/vmSymbols.hpp Tue May 07 13:33:27 2019 -0700
@@ -472,6 +472,7 @@
template(float_int_signature, "(F)I") \
template(double_long_signature, "(D)J") \
template(double_double_signature, "(D)D") \
+ template(float_float_signature, "(F)F") \
template(int_float_signature, "(I)F") \
template(long_int_signature, "(J)I") \
template(long_long_signature, "(J)J") \
@@ -771,6 +772,9 @@
do_name(fma_name, "fma") \
\
do_intrinsic(_dabs, java_lang_Math, abs_name, double_double_signature, F_S) \
+ do_intrinsic(_fabs, java_lang_Math, abs_name, float_float_signature, F_S) \
+ do_intrinsic(_iabs, java_lang_Math, abs_name, int_int_signature, F_S) \
+ do_intrinsic(_labs, java_lang_Math, abs_name, long_long_signature, F_S) \
do_intrinsic(_dsin, java_lang_Math, sin_name, double_double_signature, F_S) \
do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \
do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \
--- a/src/hotspot/share/opto/c2compiler.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/c2compiler.cpp Tue May 07 13:33:27 2019 -0700
@@ -460,6 +460,9 @@
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
case vmIntrinsics::_dabs:
+ case vmIntrinsics::_fabs:
+ case vmIntrinsics::_iabs:
+ case vmIntrinsics::_labs:
case vmIntrinsics::_datan2:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dexp:
--- a/src/hotspot/share/opto/classes.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/classes.hpp Tue May 07 13:33:27 2019 -0700
@@ -30,6 +30,7 @@
macro(AbsD)
macro(AbsF)
macro(AbsI)
+macro(AbsL)
macro(AddD)
macro(AddF)
macro(AddI)
@@ -335,6 +336,7 @@
macro(SubVL)
macro(SubVF)
macro(SubVD)
+macro(MulVB)
macro(MulVS)
macro(MulVI)
macro(MulReductionVI)
@@ -349,6 +351,10 @@
macro(FmaVF)
macro(DivVF)
macro(DivVD)
+macro(AbsVB)
+macro(AbsVS)
+macro(AbsVI)
+macro(AbsVL)
macro(AbsVF)
macro(AbsVD)
macro(NegVF)
--- a/src/hotspot/share/opto/library_call.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/library_call.cpp Tue May 07 13:33:27 2019 -0700
@@ -227,6 +227,7 @@
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_math_native(vmIntrinsics::ID id);
bool inline_math(vmIntrinsics::ID id);
+ bool inline_double_math(vmIntrinsics::ID id);
template <typename OverflowOp>
bool inline_math_overflow(Node* arg1, Node* arg2);
void inline_math_mathExact(Node* math, Node* test);
@@ -533,6 +534,9 @@
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
case vmIntrinsics::_dabs:
+ case vmIntrinsics::_fabs:
+ case vmIntrinsics::_iabs:
+ case vmIntrinsics::_labs:
case vmIntrinsics::_datan2:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dexp:
@@ -1793,7 +1797,7 @@
// public static double Math.sqrt(double)
// public static double Math.log(double)
// public static double Math.log10(double)
-bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
+bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
Node* arg = round_double_node(argument(0));
Node* n = NULL;
switch (id) {
@@ -1805,6 +1809,23 @@
return true;
}
+//------------------------------inline_math-----------------------------------
+// public static float Math.abs(float)
+// public static int Math.abs(int)
+// public static long Math.abs(long)
+bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
+ Node* arg = argument(0);
+ Node* n = NULL;
+ switch (id) {
+ case vmIntrinsics::_fabs: n = new AbsFNode( arg); break;
+ case vmIntrinsics::_iabs: n = new AbsINode( arg); break;
+ case vmIntrinsics::_labs: n = new AbsLNode( arg); break;
+ default: fatal_unexpected_iid(id); break;
+ }
+ set_result(_gvn.transform(n));
+ return true;
+}
+
//------------------------------runtime_math-----------------------------
bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
@@ -1855,8 +1876,11 @@
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
// These intrinsics are supported on all hardware
- case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
- case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false;
+ case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
+ case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_double_math(id) : false;
+ case vmIntrinsics::_fabs: return Matcher::match_rule_supported(Op_AbsF) ? inline_math(id) : false;
+ case vmIntrinsics::_iabs: return Matcher::match_rule_supported(Op_AbsI) ? inline_math(id) : false;
+ case vmIntrinsics::_labs: return Matcher::match_rule_supported(Op_AbsL) ? inline_math(id) : false;
case vmIntrinsics::_dexp:
return StubRoutines::dexp() != NULL ?
--- a/src/hotspot/share/opto/subnode.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/subnode.hpp Tue May 07 13:33:27 2019 -0700
@@ -350,6 +350,17 @@
virtual uint ideal_reg() const { return Op_RegI; }
};
+//------------------------------AbsLNode---------------------------------------
+// Absolute value a long. Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsLNode : public AbsNode {
+public:
+ AbsLNode( Node *in1 ) : AbsNode(in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
//------------------------------AbsFNode---------------------------------------
// Absolute value a float, a common float-point idiom with a cheap hardware
// implemention on most chips. Since a naive graph involves control flow, we
--- a/src/hotspot/share/opto/superword.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/superword.cpp Tue May 07 13:33:27 2019 -0700
@@ -2453,6 +2453,7 @@
}
} else if (opc == Op_SqrtF || opc == Op_SqrtD ||
opc == Op_AbsF || opc == Op_AbsD ||
+ opc == Op_AbsI || opc == Op_AbsL ||
opc == Op_NegF || opc == Op_NegD ||
opc == Op_PopCountI) {
assert(n->req() == 2, "only one input expected");
--- a/src/hotspot/share/opto/vectornode.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/vectornode.cpp Tue May 07 13:33:27 2019 -0700
@@ -70,8 +70,8 @@
return Op_SubVD;
case Op_MulI:
switch (bt) {
- case T_BOOLEAN:
- case T_BYTE: return 0; // Unimplemented
+ case T_BOOLEAN:return 0;
+ case T_BYTE: return Op_MulVB;
case T_CHAR:
case T_SHORT: return Op_MulVS;
case T_INT: return Op_MulVI;
@@ -104,6 +104,18 @@
case Op_DivD:
assert(bt == T_DOUBLE, "must be");
return Op_DivVD;
+ case Op_AbsI:
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_CHAR: return 0; // abs does not make sense for unsigned
+ case T_BYTE: return Op_AbsVB;
+ case T_SHORT: return Op_AbsVS;
+ case T_INT: return Op_AbsVI;
+ default: ShouldNotReachHere(); return 0;
+ }
+ case Op_AbsL:
+ assert(bt == T_LONG, "must be");
+ return Op_AbsVL;
case Op_AbsF:
assert(bt == T_FLOAT, "must be");
return Op_AbsVF;
@@ -350,6 +362,7 @@
case Op_SubVF: return new SubVFNode(n1, n2, vt);
case Op_SubVD: return new SubVDNode(n1, n2, vt);
+ case Op_MulVB: return new MulVBNode(n1, n2, vt);
case Op_MulVS: return new MulVSNode(n1, n2, vt);
case Op_MulVI: return new MulVINode(n1, n2, vt);
case Op_MulVL: return new MulVLNode(n1, n2, vt);
@@ -359,6 +372,10 @@
case Op_DivVF: return new DivVFNode(n1, n2, vt);
case Op_DivVD: return new DivVDNode(n1, n2, vt);
+ case Op_AbsVB: return new AbsVBNode(n1, vt);
+ case Op_AbsVS: return new AbsVSNode(n1, vt);
+ case Op_AbsVI: return new AbsVINode(n1, vt);
+ case Op_AbsVL: return new AbsVLNode(n1, vt);
case Op_AbsVF: return new AbsVFNode(n1, vt);
case Op_AbsVD: return new AbsVDNode(n1, vt);
--- a/src/hotspot/share/opto/vectornode.hpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/opto/vectornode.hpp Tue May 07 13:33:27 2019 -0700
@@ -224,6 +224,14 @@
virtual int Opcode() const;
};
+//------------------------------MulVBNode--------------------------------------
+// Vector multiply byte
+class MulVBNode : public VectorNode {
+ public:
+ MulVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
+ virtual int Opcode() const;
+};
+
//------------------------------MulVSNode--------------------------------------
// Vector multiply short
class MulVSNode : public VectorNode {
@@ -360,6 +368,38 @@
virtual int Opcode() const;
};
+//------------------------------AbsVBNode--------------------------------------
+// Vector Abs byte
+class AbsVBNode : public VectorNode {
+public:
+ AbsVBNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AbsVSNode--------------------------------------
+// Vector Abs short
+class AbsVSNode : public VectorNode {
+public:
+ AbsVSNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AbsVINode--------------------------------------
+// Vector Abs int
+class AbsVINode : public VectorNode {
+public:
+ AbsVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AbsVLNode--------------------------------------
+// Vector Abs long
+class AbsVLNode : public VectorNode {
+public:
+ AbsVLNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
+ virtual int Opcode() const;
+};
+
//------------------------------AbsVFNode--------------------------------------
// Vector Abs float
class AbsVFNode : public VectorNode {
--- a/src/hotspot/share/runtime/vmStructs.cpp Tue May 07 21:53:46 2019 +0200
+++ b/src/hotspot/share/runtime/vmStructs.cpp Tue May 07 13:33:27 2019 -0700
@@ -1758,6 +1758,10 @@
declare_c2_type(ReverseBytesLNode, Node) \
declare_c2_type(ReductionNode, Node) \
declare_c2_type(VectorNode, Node) \
+ declare_c2_type(AbsVBNode, VectorNode) \
+ declare_c2_type(AbsVSNode, VectorNode) \
+ declare_c2_type(AbsVINode, VectorNode) \
+ declare_c2_type(AbsVLNode, VectorNode) \
declare_c2_type(AddVBNode, VectorNode) \
declare_c2_type(AddVSNode, VectorNode) \
declare_c2_type(AddVINode, VectorNode) \
@@ -1774,6 +1778,7 @@
declare_c2_type(SubVLNode, VectorNode) \
declare_c2_type(SubVFNode, VectorNode) \
declare_c2_type(SubVDNode, VectorNode) \
+ declare_c2_type(MulVBNode, VectorNode) \
declare_c2_type(MulVSNode, VectorNode) \
declare_c2_type(MulVLNode, VectorNode) \
declare_c2_type(MulReductionVLNode, ReductionNode) \
@@ -1782,6 +1787,8 @@
declare_c2_type(MulVFNode, VectorNode) \
declare_c2_type(MulReductionVFNode, ReductionNode) \
declare_c2_type(MulVDNode, VectorNode) \
+ declare_c2_type(NegVFNode, VectorNode) \
+ declare_c2_type(NegVDNode, VectorNode) \
declare_c2_type(FmaVDNode, VectorNode) \
declare_c2_type(FmaVFNode, VectorNode) \
declare_c2_type(CMoveVFNode, VectorNode) \
--- a/src/java.base/share/classes/java/lang/Math.java Tue May 07 21:53:46 2019 +0200
+++ b/src/java.base/share/classes/java/lang/Math.java Tue May 07 13:33:27 2019 -0700
@@ -1353,6 +1353,7 @@
* @param a the argument whose absolute value is to be determined
* @return the absolute value of the argument.
*/
+ @HotSpotIntrinsicCandidate
public static int abs(int a) {
return (a < 0) ? -a : a;
}
@@ -1370,6 +1371,7 @@
* @param a the argument whose absolute value is to be determined
* @return the absolute value of the argument.
*/
+ @HotSpotIntrinsicCandidate
public static long abs(long a) {
return (a < 0) ? -a : a;
}
@@ -1394,6 +1396,7 @@
* @param a the argument whose absolute value is to be determined
* @return the absolute value of the argument.
*/
+ @HotSpotIntrinsicCandidate
public static float abs(float a) {
return (a <= 0.0F) ? 0.0F - a : a;
}
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java Tue May 07 21:53:46 2019 +0200
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java Tue May 07 13:33:27 2019 -0700
@@ -398,6 +398,9 @@
if (isJDK13OrHigher()) {
add(toBeInvestigated,
+ "java/lang/Math.abs(F)F",
+ "java/lang/Math.abs(I)I",
+ "java/lang/Math.abs(J)J",
"java/lang/Math.max(DD)D",
"java/lang/Math.max(FF)F",
"java/lang/Math.min(DD)D",
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java Tue May 07 21:53:46 2019 +0200
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java Tue May 07 13:33:27 2019 -0700
@@ -86,6 +86,7 @@
test_divc_n(a0, a1);
test_divv(a0, a1, -VALUE);
test_diva(a0, a1, a3);
+ test_negc(a0, a1);
}
// Test and verify results
System.out.println("Verification");
@@ -339,6 +340,16 @@
for (int i=12; i<ARRLEN; i++) {
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
}
+ test_negc(a0, a1);
+ errn += verify("test_negc: ", 0, a0[0], (Double.NaN));
+ errn += verify("test_negc: ", 1, a0[1], (Double.NEGATIVE_INFINITY));
+ errn += verify("test_negc: ", 2, a0[2], (Double.POSITIVE_INFINITY));
+ errn += verify("test_negc: ", 3, a0[3], (double)(-Double.MAX_VALUE));
+ errn += verify("test_negc: ", 4, a0[4], (double)(-Double.MIN_VALUE));
+ errn += verify("test_negc: ", 5, a0[5], (double)(-Double.MIN_NORMAL));
+ for (int i=6; i<ARRLEN; i++) {
+ errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
+ }
}
@@ -469,6 +480,13 @@
end = System.currentTimeMillis();
System.out.println("test_diva_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_negc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_negc_n: " + (end - start));
+
return errn;
}
@@ -553,6 +571,11 @@
a0[i] = (a1[i]/a2[i]);
}
}
+ static void test_negc(double[] a0, double[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (double)(-((double)a1[i]));
+ }
+ }
static int verify(String text, int i, double elem, double val) {
if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestFloatVect.java Tue May 07 21:53:46 2019 +0200
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestFloatVect.java Tue May 07 13:33:27 2019 -0700
@@ -86,6 +86,7 @@
test_divc_n(a0, a1);
test_divv(a0, a1, -VALUE);
test_diva(a0, a1, a3);
+ test_negc(a0, a1);
}
// Test and verify results
System.out.println("Verification");
@@ -340,6 +341,17 @@
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
}
+ test_negc(a0, a1);
+ errn += verify("test_negc: ", 0, a0[0], (Float.NaN));
+ errn += verify("test_negc: ", 1, a0[1], (Float.NEGATIVE_INFINITY));
+ errn += verify("test_negc: ", 2, a0[2], (Float.POSITIVE_INFINITY));
+ errn += verify("test_negc: ", 3, a0[3], (float)(-Float.MAX_VALUE));
+ errn += verify("test_negc: ", 4, a0[4], (float)(-Float.MIN_VALUE));
+ errn += verify("test_negc: ", 5, a0[5], (float)(-Float.MIN_NORMAL));
+ for (int i=6; i<ARRLEN; i++) {
+ errn += verify("test_negc: ", i, a0[i], (float)(-((float)(ADD_INIT+i))));
+ }
+
}
if (errn > 0)
@@ -469,6 +481,13 @@
end = System.currentTimeMillis();
System.out.println("test_diva_n: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_negc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_negc_n: " + (end - start));
+
return errn;
}
@@ -554,6 +573,12 @@
}
}
+ static void test_negc(float[] a0, float[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (float)(-((float)a1[i]));
+ }
+ }
+
static int verify(String text, int i, float elem, float val) {
if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestIntVect.java Tue May 07 21:53:46 2019 +0200
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestIntVect.java Tue May 07 13:33:27 2019 -0700
@@ -102,6 +102,10 @@
test_xorv(a0, a1, (int)BIT_MASK);
test_xora(a0, a1, a4);
+ test_absc(a0, a1);
+ test_negc(a0, a1);
+ test_notc(a0, a1);
+
test_sllc(a0, a1);
test_sllv(a0, a1, VALUE);
test_srlc(a0, a1);
@@ -276,6 +280,21 @@
errn += verify("test_xora: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
}
+ test_absc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_absc: ", i, a0[i], (int)(Math.abs((int)(ADD_INIT+i))));
+ }
+
+ test_negc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_negc: ", i, a0[i], (int)(-(int)(ADD_INIT+i)));
+ }
+
+ test_notc(a0, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_notc: ", i, a0[i], (int)(~(int)(ADD_INIT+i)));
+ }
+
test_sllc(a0, a1);
for (int i=0; i<ARRLEN; i++) {
errn += verify("test_sllc: ", i, a0[i], (int)((int)(ADD_INIT+i)<<VALUE));
@@ -650,6 +669,27 @@
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
+ test_absc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_absc: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_negc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_negc: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_notc(a0, a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_notc: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
test_sllc(a0, a1);
}
end = System.currentTimeMillis();
@@ -1040,6 +1080,24 @@
}
}
+ static void test_absc(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)(Math.abs(a1[i]));
+ }
+ }
+
+ static void test_negc(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)(-a1[i]);
+ }
+ }
+
+ static void test_notc(int[] a0, int[] a1) {
+ for (int i = 0; i < a0.length; i+=1) {
+ a0[i] = (int)(~a1[i]);
+ }
+ }
+
static void test_sllc(int[] a0, int[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (int)(a1[i]<<VALUE);