--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp Tue Sep 04 18:32:28 2018 +0100
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp Tue Sep 04 12:44:02 2018 -0700
@@ -4166,19 +4166,22 @@
if ((dst_enc < 16) && (nds_enc < 16)) {
vandps(dst, nds, negate_field, vector_len);
} else if ((src_enc < 16) && (dst_enc < 16)) {
+ // Use src scratch register
evmovdqul(src, nds, Assembler::AVX_512bit);
vandps(dst, src, negate_field, vector_len);
+ } else if (dst_enc < 16) {
+ evmovdqul(dst, nds, Assembler::AVX_512bit);
+ vandps(dst, dst, negate_field, vector_len);
+ } else if (nds_enc < 16) {
+ vandps(nds, nds, negate_field, vector_len);
+ evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (src_enc < 16) {
evmovdqul(src, nds, Assembler::AVX_512bit);
vandps(src, src, negate_field, vector_len);
evmovdqul(dst, src, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- evmovdqul(src, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- vandps(dst, xmm0, negate_field, vector_len);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
} else {
if (src_enc != dst_enc) {
+ // Use src scratch register
evmovdqul(src, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
vandps(xmm0, xmm0, negate_field, vector_len);
@@ -4201,17 +4204,19 @@
if ((dst_enc < 16) && (nds_enc < 16)) {
vandpd(dst, nds, negate_field, vector_len);
} else if ((src_enc < 16) && (dst_enc < 16)) {
+ // Use src scratch register
evmovdqul(src, nds, Assembler::AVX_512bit);
vandpd(dst, src, negate_field, vector_len);
+ } else if (dst_enc < 16) {
+ evmovdqul(dst, nds, Assembler::AVX_512bit);
+ vandpd(dst, dst, negate_field, vector_len);
+ } else if (nds_enc < 16) {
+ vandpd(nds, nds, negate_field, vector_len);
+ evmovdqul(dst, nds, Assembler::AVX_512bit);
} else if (src_enc < 16) {
evmovdqul(src, nds, Assembler::AVX_512bit);
vandpd(src, src, negate_field, vector_len);
evmovdqul(dst, src, Assembler::AVX_512bit);
- } else if (dst_enc < 16) {
- evmovdqul(src, xmm0, Assembler::AVX_512bit);
- evmovdqul(xmm0, nds, Assembler::AVX_512bit);
- vandpd(dst, xmm0, negate_field, vector_len);
- evmovdqul(xmm0, src, Assembler::AVX_512bit);
} else {
if (src_enc != dst_enc) {
evmovdqul(src, xmm0, Assembler::AVX_512bit);
@@ -4282,6 +4287,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpaddb(xmm0, xmm0, src, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4330,7 +4336,7 @@
} else if (dst_enc < 16) {
Assembler::vpaddw(dst, dst, src, vector_len);
} else if (nds_enc < 16) {
- // implies dst_enc in upper bank with src as scratch
+ // implies dst_enc in upper bank with nds as scratch
evmovdqul(nds, dst, Assembler::AVX_512bit);
Assembler::vpaddw(nds, nds, src, vector_len);
evmovdqul(dst, nds, Assembler::AVX_512bit);
@@ -4339,6 +4345,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpaddw(xmm0, xmm0, src, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4522,6 +4529,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpmullw(xmm0, xmm0, src, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4578,7 +4586,8 @@
// worse case scenario, all regs in upper bank
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+ Assembler::vpsubb(xmm0, xmm0, src, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4636,6 +4645,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsubw(xmm0, xmm0, src, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4649,7 +4659,7 @@
} else if ((dst_enc < 16) && (shift_enc < 16)) {
Assembler::vpsraw(dst, dst, shift, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds_enc as scratch with shift
+ // use nds as scratch with shift
evmovdqul(nds, shift, Assembler::AVX_512bit);
Assembler::vpsraw(dst, dst, nds, vector_len);
} else if ((shift_enc < 16) && (nds_enc < 16)) {
@@ -4664,7 +4674,7 @@
Assembler::vpsraw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else if (nds_enc < 16) {
- // use nds as dest as temps
+ // use nds and dst as temps
evmovdqul(nds, dst, Assembler::AVX_512bit);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, shift, Assembler::AVX_512bit);
@@ -4677,8 +4687,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, shift, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+ Assembler::vpsraw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
pop_zmm(xmm1);
@@ -4702,6 +4711,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsraw(xmm0, xmm0, shift, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4715,7 +4725,7 @@
} else if ((dst_enc < 16) && (shift_enc < 16)) {
Assembler::vpsrlw(dst, dst, shift, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds_enc as scratch with shift
+ // use nds as scratch with shift
evmovdqul(nds, shift, Assembler::AVX_512bit);
Assembler::vpsrlw(dst, dst, nds, vector_len);
} else if ((shift_enc < 16) && (nds_enc < 16)) {
@@ -4730,7 +4740,7 @@
Assembler::vpsrlw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else if (nds_enc < 16) {
- // use nds as dest as temps
+ // use nds and dst as temps
evmovdqul(nds, dst, Assembler::AVX_512bit);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, shift, Assembler::AVX_512bit);
@@ -4743,8 +4753,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm1, shift, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
- Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(xmm1, dst, Assembler::AVX_512bit);
+ Assembler::vpsrlw(xmm0, xmm0, xmm1, vector_len);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
pop_zmm(xmm1);
@@ -4768,6 +4777,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsrlw(xmm0, xmm0, shift, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -4781,7 +4791,7 @@
} else if ((dst_enc < 16) && (shift_enc < 16)) {
Assembler::vpsllw(dst, dst, shift, vector_len);
} else if ((dst_enc < 16) && (nds_enc < 16)) {
- // use nds_enc as scratch with shift
+ // use nds as scratch with shift
evmovdqul(nds, shift, Assembler::AVX_512bit);
Assembler::vpsllw(dst, dst, nds, vector_len);
} else if ((shift_enc < 16) && (nds_enc < 16)) {
@@ -4796,7 +4806,7 @@
Assembler::vpsllw(dst, dst, xmm0, vector_len);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
} else if (nds_enc < 16) {
- // use nds as dest as temps
+ // use nds and dst as temps
evmovdqul(nds, dst, Assembler::AVX_512bit);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, shift, Assembler::AVX_512bit);
@@ -4810,7 +4820,6 @@
evmovdqul(xmm1, shift, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
- evmovdqul(xmm1, dst, Assembler::AVX_512bit);
evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
pop_zmm(xmm1);
@@ -4834,6 +4843,7 @@
evmovdqul(nds, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, dst, Assembler::AVX_512bit);
Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
+ evmovdqul(dst, xmm0, Assembler::AVX_512bit);
evmovdqul(xmm0, nds, Assembler::AVX_512bit);
}
}
@@ -7130,7 +7140,7 @@
bind(RET_NOT_FOUND);
movl(result, -1);
- jmpb(CLEANUP);
+ jmp(CLEANUP);
bind(FOUND_SUBSTR);
// Compute start addr of substr
@@ -7148,7 +7158,7 @@
addl(tmp, cnt2);
// Found result if we matched whole substring.
cmpl(tmp, stride);
- jccb(Assembler::lessEqual, RET_FOUND);
+ jcc(Assembler::lessEqual, RET_FOUND);
// Repeat search for small substring (<= 8 chars)
// from new point 'str1' without reloading substring.
@@ -7248,7 +7258,7 @@
jcc(Assembler::carryClear, FOUND_CHAR);
addptr(result, 32);
subl(tmp, 2*stride);
- jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
jmp(SCAN_TO_8_CHAR);
bind(SCAN_TO_8_CHAR_INIT);
movdl(vec1, ch);
@@ -7278,7 +7288,7 @@
jcc(Assembler::carryClear, FOUND_CHAR);
addptr(result, 16);
subl(tmp, stride);
- jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
+ jcc(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
bind(SCAN_TO_CHAR);
testl(cnt1, cnt1);
jcc(Assembler::zero, RET_NOT_FOUND);
@@ -7857,7 +7867,7 @@
// Compare 16-byte vectors
andl(result, 0x0000000f); // tail count (in bytes)
andl(len, 0xfffffff0); // vector count (in bytes)
- jccb(Assembler::zero, COMPARE_TAIL);
+ jcc(Assembler::zero, COMPARE_TAIL);
lea(ary1, Address(ary1, len, Address::times_1));
negptr(len);
@@ -7869,12 +7879,12 @@
bind(COMPARE_WIDE_VECTORS);
movdqu(vec1, Address(ary1, len, Address::times_1));
ptest(vec1, vec2);
- jccb(Assembler::notZero, TRUE_LABEL);
+ jcc(Assembler::notZero, TRUE_LABEL);
addptr(len, 16);
jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
testl(result, result);
- jccb(Assembler::zero, FALSE_LABEL);
+ jcc(Assembler::zero, FALSE_LABEL);
movdqu(vec1, Address(ary1, result, Address::times_1, -16));
ptest(vec1, vec2);
@@ -9069,7 +9079,7 @@
jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found
addq(result, 32);
subq(length, 32);
- jccb(Assembler::greaterEqual, VECTOR32_LOOP);
+ jcc(Assembler::greaterEqual, VECTOR32_LOOP);
addq(length, 32);
jcc(Assembler::equal, SAME_TILL_END);
//falling through if less than 32 bytes left //close the branch here.
@@ -9140,24 +9150,24 @@
load_unsigned_byte(tmp2, Address(objb, result));
xorl(tmp1, tmp2);
testl(tmp1, tmp1);
- jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
decq(length);
- jccb(Assembler::zero, SAME_TILL_END);
+ jcc(Assembler::zero, SAME_TILL_END);
incq(result);
load_unsigned_byte(tmp1, Address(obja, result));
load_unsigned_byte(tmp2, Address(objb, result));
xorl(tmp1, tmp2);
testl(tmp1, tmp1);
- jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
decq(length);
- jccb(Assembler::zero, SAME_TILL_END);
+ jcc(Assembler::zero, SAME_TILL_END);
incq(result);
load_unsigned_byte(tmp1, Address(obja, result));
load_unsigned_byte(tmp2, Address(objb, result));
xorl(tmp1, tmp2);
testl(tmp1, tmp1);
- jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
- jmpb(SAME_TILL_END);
+ jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
+ jmp(SAME_TILL_END);
if (UseAVX >= 2) {
bind(VECTOR32_NOT_EQUAL);
@@ -9168,7 +9178,7 @@
bsfq(tmp1, tmp1);
addq(result, tmp1);
shrq(result);
- jmpb(DONE);
+ jmp(DONE);
}
bind(VECTOR16_NOT_EQUAL);
@@ -10590,7 +10600,7 @@
andl(len, 0xfffffff0); // vector count (in chars)
andl(result, 0x0000000f); // tail count (in chars)
testl(len, len);
- jccb(Assembler::zero, copy_16);
+ jcc(Assembler::zero, copy_16);
// compress 16 chars per iter
movdl(tmp1Reg, tmp5);