--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Nov 22 11:34:11 2016 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Nov 22 13:59:45 2016 -0800
@@ -10773,16 +10773,13 @@
// save length for return
push(len);
- // 8165287: EVEX version disabled for now, needs to be refactored as
- // it is returning incorrect results.
if ((UseAVX > 2) && // AVX512
- 0 &&
VM_Version::supports_avx512vlbw() &&
VM_Version::supports_bmi2()) {
set_vector_masking(); // opening of the stub context for programming mask registers
- Label copy_32_loop, copy_loop_tail, copy_just_portion_of_candidates;
+ Label copy_32_loop, copy_loop_tail, restore_k1_return_zero;
// alignement
Label post_alignement;
@@ -10797,16 +10794,16 @@
movl(result, 0x00FF);
evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
- testl(len, -64);
- jcc(Assembler::zero, post_alignement);
-
// Save k1
kmovql(k3, k1);
+ testl(len, -64);
+ jcc(Assembler::zero, post_alignement);
+
movl(tmp5, dst);
- andl(tmp5, (64 - 1));
+ andl(tmp5, (32 - 1));
negl(tmp5);
- andl(tmp5, (64 - 1));
+ andl(tmp5, (32 - 1));
// bail out when there is nothing to be done
testl(tmp5, 0xFFFFFFFF);
@@ -10816,13 +10813,12 @@
movl(result, 0xFFFFFFFF);
shlxl(result, result, tmp5);
notl(result);
-
kmovdl(k1, result);
evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k1);
- jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+ jcc(Assembler::carryClear, restore_k1_return_zero);
evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
@@ -10835,7 +10831,7 @@
// end of alignement
movl(tmp5, len);
- andl(tmp5, (32 - 1)); // tail count (in chars)
+ andl(tmp5, (32 - 1)); // tail count (in chars)
andl(len, ~(32 - 1)); // vector count (in chars)
jcc(Assembler::zero, copy_loop_tail);
@@ -10847,7 +10843,7 @@
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
- jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+ jcc(Assembler::carryClear, restore_k1_return_zero);
// All elements in current processed chunk are valid candidates for
// compression. Write a truncated byte elements to the memory.
@@ -10858,11 +10854,10 @@
bind(copy_loop_tail);
// bail out when there is nothing to be done
testl(tmp5, 0xFFFFFFFF);
+ // Restore k1
+ kmovql(k1, k3);
jcc(Assembler::zero, return_length);
- // Save k1
- kmovql(k3, k1);
-
movl(len, tmp5);
// ~(~0 << len), where len is the # of remaining elements to process
@@ -10875,30 +10870,16 @@
evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k1);
- jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+ jcc(Assembler::carryClear, restore_k1_return_zero);
evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
// Restore k1
kmovql(k1, k3);
-
jmp(return_length);
- bind(copy_just_portion_of_candidates);
- kmovdl(tmp5, k2);
- tzcntl(tmp5, tmp5);
-
- // ~(~0 << tmp5), where tmp5 is a number of elements in an array from the
- // result to the first element larger than 0xFF
- movl(result, 0xFFFFFFFF);
- shlxl(result, result, tmp5);
- notl(result);
-
- kmovdl(k1, result);
-
- evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
+ bind(restore_k1_return_zero);
// Restore k1
kmovql(k1, k3);
-
jmp(return_zero);
clear_vector_masking(); // closing of the stub context for programming mask registers