8153713: aarch64: improve short array clearing using store pair
authorfyang
Tue, 12 Apr 2016 11:53:44 +0800
changeset 38037 31c22b526d30
parent 38036 f51b942d970c
child 38041 eae42e8b1a4b
8153713: aarch64: improve short array clearing using store pair Summary: aarch64: generate store pair instruction to clear short arrays Reviewed-by: aph
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Thu Apr 14 08:32:39 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Apr 12 11:53:44 2016 +0800
@@ -13321,6 +13321,20 @@
   ins_pipe(pipe_class_memory);
 %}
 
+instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
+%{
+  match(Set dummy (ClearArray cnt base));
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ClearArray $cnt, $base" %}
+
+  ins_encode %{
+    __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
+  %}
+
+  ins_pipe(pipe_class_memory);
+%}
+
 // ============================================================================
 // Overflow Math Instructions
 
--- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp	Thu Apr 14 08:32:39 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp	Tue Apr 12 11:53:44 2016 +0800
@@ -76,7 +76,8 @@
 // avoid biased locking while we are bootstrapping the aarch64 build
 define_pd_global(bool, UseBiasedLocking, false);
 
-define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
+// Clear short arrays bigger than one word in an arch-specific way
+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
 
 #if defined(COMPILER1) || defined(COMPILER2)
 define_pd_global(intx, InlineSmallCode,          1000);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Apr 14 08:32:39 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Apr 12 11:53:44 2016 +0800
@@ -4677,6 +4677,39 @@
   fill_words(base, cnt, zr);
 }
 
+// base:   Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:    Immediate count in 8-byte unit.
+#define ShortArraySize (18 * BytesPerLong)
+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
+{
+  int i = cnt & 1;  // store any odd word to start
+  if (i) str(zr, Address(base));
+
+  if (cnt <= ShortArraySize / BytesPerLong) {
+    for (; i < (int)cnt; i += 2)
+      stp(zr, zr, Address(base, i * wordSize));
+  } else {
+    const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
+    int remainder = cnt % (2 * unroll);
+    for (; i < remainder; i += 2)
+      stp(zr, zr, Address(base, i * wordSize));
+
+    Label loop;
+    Register cnt_reg = rscratch1;
+    Register loop_base = rscratch2;
+    cnt = cnt - remainder;
+    mov(cnt_reg, cnt);
+    // adjust base and prebias by -2 * wordSize so we can pre-increment
+    add(loop_base, base, (remainder - 2) * wordSize);
+    bind(loop);
+    sub(cnt_reg, cnt_reg, 2 * unroll);
+    for (i = 1; i < unroll; i++)
+      stp(zr, zr, Address(loop_base, 2 * i * wordSize));
+    stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
+    cbnz(cnt_reg, loop);
+  }
+}
+
 // base:   Address of a buffer to be filled, 8 bytes aligned.
 // cnt:    Count in 8-byte unit.
 // value:  Value to be filled with.
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Apr 14 08:32:39 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Apr 12 11:53:44 2016 +0800
@@ -1186,6 +1186,7 @@
 
   void fill_words(Register base, Register cnt, Register value);
   void zero_words(Register base, Register cnt);
+  void zero_words(Register base, u_int64_t cnt);
 
   void encode_iso_array(Register src, Register dst,
                         Register len, Register result,