--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu May 11 13:11:42 2017 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue May 16 12:55:54 2017 -0700
@@ -4134,28 +4134,33 @@
if ((dst_enc < 16) && (nds_enc < 16)) {
vandps(dst, nds, negate_field, vector_len);
} else if ((src_enc < 16) && (dst_enc < 16)) {
- movss(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandps(dst, src, negate_field, vector_len);
} else if (src_enc < 16) {
- movss(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandps(src, src, negate_field, vector_len);
- movss(dst, src);
+ evmovdqul(dst, src, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
- movdqu(src, xmm0);
- movss(xmm0, nds);
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
vandps(dst, xmm0, negate_field, vector_len);
- movdqu(xmm0, src);
- } else if (nds_enc < 16) {
- movdqu(src, xmm0);
- vandps(xmm0, nds, negate_field, vector_len);
- movss(dst, xmm0);
- movdqu(xmm0, src);
- } else {
- movdqu(src, xmm0);
- movss(xmm0, nds);
- vandps(xmm0, xmm0, negate_field, vector_len);
- movss(dst, xmm0);
- movdqu(xmm0, src);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ } else {
+ if (src_enc != dst_enc) {
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandps(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandps(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
}
}
@@ -4166,28 +4171,33 @@
if ((dst_enc < 16) && (nds_enc < 16)) {
vandpd(dst, nds, negate_field, vector_len);
} else if ((src_enc < 16) && (dst_enc < 16)) {
- movsd(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandpd(dst, src, negate_field, vector_len);
} else if (src_enc < 16) {
- movsd(src, nds);
+ evmovdqul(src, nds, Assembler::AVX_512bit);
vandpd(src, src, negate_field, vector_len);
- movsd(dst, src);
+ evmovdqul(dst, src, Assembler::AVX_512bit);
} else if (dst_enc < 16) {
- movdqu(src, xmm0);
- movsd(xmm0, nds);
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
vandpd(dst, xmm0, negate_field, vector_len);
- movdqu(xmm0, src);
- } else if (nds_enc < 16) {
- movdqu(src, xmm0);
- vandpd(xmm0, nds, negate_field, vector_len);
- movsd(dst, xmm0);
- movdqu(xmm0, src);
- } else {
- movdqu(src, xmm0);
- movsd(xmm0, nds);
- vandpd(xmm0, xmm0, negate_field, vector_len);
- movsd(dst, xmm0);
- movdqu(xmm0, src);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ } else {
+ if (src_enc != dst_enc) {
+ evmovdqul(src, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandpd(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, src, Assembler::AVX_512bit);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+ vandpd(xmm0, xmm0, negate_field, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
}
}
@@ -4934,6 +4944,24 @@
}
}
+void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
+ if (VM_Version::supports_avx512vl()) {
+ Assembler::pshufd(dst, src, mode);
+ } else {
+ int dst_enc = dst->encoding();
+ if (dst_enc < 16) {
+ Assembler::pshufd(dst, src, mode);
+ } else {
+ subptr(rsp, 64);
+ evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+ Assembler::pshufd(xmm0, src, mode);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+ evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+ addptr(rsp, 64);
+ }
+ }
+}
+
// This instruction exists within macros, ergo we cannot control its input
// when emitted through those patterns.
void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {