--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Apr 25 15:14:02 2016 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Apr 27 13:37:07 2016 -0700
@@ -2323,6 +2323,15 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+// This instruction produces ZF or CF flags
+void Assembler::ktestql(KRegister src1, KRegister src2) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0x99);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::movb(Address dst, int imm8) {
InstructionMark im(this);
prefix(dst);
@@ -2491,6 +2500,19 @@
emit_operand(src, dst);
}
+void Assembler::evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx512vlbw(), "");
+ assert(is_vector_masking(), ""); // For stub code use only
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_operand(dst, src);
+}
+
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -3285,6 +3307,19 @@
emit_operand(as_Register(dst_enc), src);
}
+void Assembler::evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+ assert(VM_Version::supports_avx512vlbw(), "");
+ assert(is_vector_masking(), ""); // For stub code use only
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x74);
+ emit_operand(as_Register(kdst->encoding()), src);
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse2(), "");
@@ -6938,7 +6973,7 @@
emit_int8(byte3);
// P2: byte 4 as zL'Lbv'aaa
- int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+ int byte4 = (_attributes->is_no_reg_mask()) ? 0 : _attributes->get_embedded_opmask_register_specifier(); // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
// EVEX.v` for extending EVEX.vvvv or VIDX
byte4 |= (evex_v ? 0: EVEX_V);
// third EXEC.b for broadcast actions
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Mon Apr 25 15:14:02 2016 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Apr 27 13:37:07 2016 -0700
@@ -606,6 +606,7 @@
bool _legacy_mode_vl;
bool _legacy_mode_vlbw;
bool _is_managed;
+ bool _vector_masking; // For stub code use only
class InstructionAttr *_attributes;
@@ -813,6 +814,7 @@
_legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
_legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
_is_managed = false;
+ _vector_masking = false;
_attributes = NULL;
}
@@ -823,6 +825,12 @@
void clear_managed(void) { _is_managed = false; }
bool is_managed(void) { return _is_managed; }
+ // Following functions are for stub code use only
+ void set_vector_masking(void) { _vector_masking = true; }
+ void clear_vector_masking(void) { _vector_masking = false; }
+ bool is_vector_masking(void) { return _vector_masking; }
+
+
void lea(Register dst, Address src);
void mov(Register dst, Register src);
@@ -1354,6 +1362,8 @@
void kortestdl(KRegister dst, KRegister src);
void kortestql(KRegister dst, KRegister src);
+ void ktestql(KRegister dst, KRegister src);
+
void movdl(XMMRegister dst, Register src);
void movdl(Register dst, XMMRegister src);
void movdl(XMMRegister dst, Address src);
@@ -1381,6 +1391,7 @@
void evmovdqub(Address dst, XMMRegister src, int vector_len);
void evmovdqub(XMMRegister dst, Address src, int vector_len);
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len);
void evmovdquw(Address dst, XMMRegister src, int vector_len);
void evmovdquw(XMMRegister dst, Address src, int vector_len);
void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
@@ -1534,6 +1545,7 @@
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqw(XMMRegister dst, XMMRegister src);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2098,7 +2110,8 @@
_evex_encoding(0),
_is_clear_context(false),
_is_extended_context(false),
- _current_assembler(NULL) {
+ _current_assembler(NULL),
+ _embedded_opmask_register_specifier(1) { // hard code k1, it will be initialized for now
if (UseAVX < 3) _legacy_mode = true;
}
@@ -2122,6 +2135,7 @@
int _evex_encoding;
bool _is_clear_context;
bool _is_extended_context;
+ int _embedded_opmask_register_specifier;
Assembler *_current_assembler;
@@ -2139,6 +2153,7 @@
int get_evex_encoding(void) const { return _evex_encoding; }
bool is_clear_context(void) const { return _is_clear_context; }
bool is_extended_context(void) const { return _is_extended_context; }
+ int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
// Set the vector len manually
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
@@ -2172,6 +2187,11 @@
}
}
+ // Set embedded opmask register specifier.
+ void set_embedded_opmask_register_specifier(KRegister mask) {
+ _embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
+ }
+
};
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Mon Apr 25 15:14:02 2016 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Apr 27 13:37:07 2016 -0700
@@ -9425,6 +9425,7 @@
void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.");
+ Label VECTOR64_LOOP, VECTOR64_TAIL, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL;
Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
@@ -9437,11 +9438,62 @@
shlq(length);
xorq(result, result);
+ if ((UseAVX > 2) &&
+ VM_Version::supports_avx512vlbw()) {
+ set_vector_masking(); // opening of the stub context for programming mask registers
+ cmpq(length, 64);
+ jcc(Assembler::less, VECTOR32_TAIL);
+ movq(tmp1, length);
+ andq(tmp1, 0x3F); // tail count
+ andq(length, ~(0x3F)); //vector count
+
+ bind(VECTOR64_LOOP);
+ // AVX512 code to compare 64 byte vectors.
+ evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
+ evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
+ kortestql(k7, k7);
+ jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
+ addq(result, 64);
+ subq(length, 64);
+ jccb(Assembler::notZero, VECTOR64_LOOP);
+
+ //bind(VECTOR64_TAIL);
+ testq(tmp1, tmp1);
+ jcc(Assembler::zero, SAME_TILL_END);
+
+ bind(VECTOR64_TAIL);
+ // AVX512 code to compare upto 63 byte vectors.
+ // Save k1
+ kmovql(k3, k1);
+ mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
+ shlxq(tmp2, tmp2, tmp1);
+ notq(tmp2);
+ kmovql(k1, tmp2);
+
+ evmovdqub(k1, rymm0, Address(obja, result), Assembler::AVX_512bit);
+ evpcmpeqb(k1, k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
+
+ ktestql(k7, k1);
+ // Restore k1
+ kmovql(k1, k3);
+ jcc(Assembler::below, SAME_TILL_END); // not mismatch
+
+ bind(VECTOR64_NOT_EQUAL);
+ kmovql(tmp1, k7);
+ notq(tmp1);
+ tzcntq(tmp1, tmp1);
+ addq(result, tmp1);
+ shrq(result);
+ jmp(DONE);
+ bind(VECTOR32_TAIL);
+ clear_vector_masking(); // closing of the stub context for programming mask registers
+ }
+
cmpq(length, 8);
jcc(Assembler::equal, VECTOR8_LOOP);
jcc(Assembler::less, VECTOR4_TAIL);
- if (UseAVX >= 2){
+ if (UseAVX >= 2) {
cmpq(length, 16);
jcc(Assembler::equal, VECTOR16_LOOP);
@@ -9549,7 +9601,7 @@
jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
jmpb(SAME_TILL_END);
- if (UseAVX >= 2){
+ if (UseAVX >= 2) {
bind(VECTOR32_NOT_EQUAL);
vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
@@ -9562,7 +9614,7 @@
}
bind(VECTOR16_NOT_EQUAL);
- if (UseAVX >= 2){
+ if (UseAVX >= 2) {
vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
pxor(rymm0, rymm2);
@@ -9593,7 +9645,6 @@
bind(DONE);
}
-
//Helper functions for square_to_len()
/**