--- a/src/hotspot/cpu/x86/x86_32.ad Mon Jun 11 14:06:50 2018 -0700
+++ b/src/hotspot/cpu/x86/x86_32.ad Tue Jun 12 21:29:47 2018 -0700
@@ -11482,10 +11482,10 @@
// =======================================================================
// fast clearing of an array
-instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(!((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base));
- effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+ effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
format %{ $$template
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
@@ -11502,6 +11502,32 @@
if (UseFastStosb) {
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
+ } else if (UseXMMForObjInit) {
+ $$emit$$"MOV RDI,RAX\n\t"
+ $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
+ $$emit$$"JMPQ L_zero_64_bytes\n\t"
+ $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+ $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
+ $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
+ $$emit$$"ADD 0x40,RAX\n\t"
+ $$emit$$"# L_zero_64_bytes:\n\t"
+ $$emit$$"SUB 0x8,RCX\n\t"
+ $$emit$$"JGE L_loop\n\t"
+ $$emit$$"ADD 0x4,RCX\n\t"
+ $$emit$$"JL L_tail\n\t"
+ $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
+ $$emit$$"ADD 0x20,RAX\n\t"
+ $$emit$$"SUB 0x4,RCX\n\t"
+ $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+ $$emit$$"ADD 0x4,RCX\n\t"
+ $$emit$$"JLE L_end\n\t"
+ $$emit$$"DEC RCX\n\t"
+ $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+ $$emit$$"VMOVQ XMM0,(RAX)\n\t"
+ $$emit$$"ADD 0x8,RAX\n\t"
+ $$emit$$"DEC RCX\n\t"
+ $$emit$$"JGE L_sloop\n\t"
+ $$emit$$"# L_end:\n\t"
} else {
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
@@ -11509,28 +11535,57 @@
$$emit$$"# DONE"
%}
ins_encode %{
- __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
+ $tmp$$XMMRegister, false);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base));
- effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+ effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
format %{ $$template
- $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
if (UseFastStosb) {
+ $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
+ } else if (UseXMMForObjInit) {
+ $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
+ $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
+ $$emit$$"JMPQ L_zero_64_bytes\n\t"
+ $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+ $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
+ $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
+ $$emit$$"ADD 0x40,RAX\n\t"
+ $$emit$$"# L_zero_64_bytes:\n\t"
+ $$emit$$"SUB 0x8,RCX\n\t"
+ $$emit$$"JGE L_loop\n\t"
+ $$emit$$"ADD 0x4,RCX\n\t"
+ $$emit$$"JL L_tail\n\t"
+ $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
+ $$emit$$"ADD 0x20,RAX\n\t"
+ $$emit$$"SUB 0x4,RCX\n\t"
+ $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+ $$emit$$"ADD 0x4,RCX\n\t"
+ $$emit$$"JLE L_end\n\t"
+ $$emit$$"DEC RCX\n\t"
+ $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+ $$emit$$"VMOVQ XMM0,(RAX)\n\t"
+ $$emit$$"ADD 0x8,RAX\n\t"
+ $$emit$$"DEC RCX\n\t"
+ $$emit$$"JGE L_sloop\n\t"
+ $$emit$$"# L_end:\n\t"
} else {
+ $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{
- __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
+ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
+ $tmp$$XMMRegister, true);
%}
ins_pipe( pipe_slow );
%}