hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
changeset 42661 2343dd675325
parent 42068 18c8a4d5998e
child 43428 3dccc891c216
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Tue Dec 13 17:58:19 2016 -0800
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Dec 14 10:28:52 2016 +0100
@@ -3332,53 +3332,90 @@
 }
 
 // Clear Array
+// For very short arrays. tmp == R0 is allowed.
+void MacroAssembler::clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp, int offset) {
+  if (cnt_dwords > 0) { li(tmp, 0); }
+  for (int i = 0; i < cnt_dwords; ++i) { std(tmp, offset + i * 8, base_ptr); }
+}
+
+// Version for constant short array length. Kills base_ptr. tmp == R0 is allowed.
+void MacroAssembler::clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp) {
+  if (cnt_dwords < 8) {
+    clear_memory_unrolled(base_ptr, cnt_dwords, tmp);
+    return;
+  }
+
+  Label loop;
+  const long loopcnt   = cnt_dwords >> 1,
+             remainder = cnt_dwords & 1;
+
+  li(tmp, loopcnt);
+  mtctr(tmp);
+  li(tmp, 0);
+  bind(loop);
+    std(tmp, 0, base_ptr);
+    std(tmp, 8, base_ptr);
+    addi(base_ptr, base_ptr, 16);
+    bdnz(loop);
+  if (remainder) { std(tmp, 0, base_ptr); }
+}
+
 // Kills both input registers. tmp == R0 is allowed.
-void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
+void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp, long const_cnt) {
   // Procedure for large arrays (uses data cache block zero instruction).
     Label startloop, fast, fastloop, small_rest, restloop, done;
     const int cl_size         = VM_Version::L1_data_cache_line_size(),
-              cl_dwords       = cl_size>>3,
+              cl_dwords       = cl_size >> 3,
               cl_dw_addr_bits = exact_log2(cl_dwords),
-              dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
-
-//2:
-    cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
-    blt(CCR1, small_rest);                                      // Too small.
-    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
-    beq(CCR0, fast);                                            // Already 128byte aligned.
+              dcbz_min        = 1,  // Min count of dcbz executions, needs to be >0.
+              min_cnt         = ((dcbz_min + 1) << cl_dw_addr_bits) - 1;
+
+  if (const_cnt >= 0) {
+    // Constant case.
+    if (const_cnt < min_cnt) {
+      clear_memory_constlen(base_ptr, const_cnt, tmp);
+      return;
+    }
+    load_const_optimized(cnt_dwords, const_cnt, tmp);
+  } else {
+    // cnt_dwords already loaded in register. Need to check size.
+    cmpdi(CCR1, cnt_dwords, min_cnt); // Big enough? (ensure >= dcbz_min lines included).
+    blt(CCR1, small_rest);
+  }
+    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
+    beq(CCR0, fast);                                  // Already 128byte aligned.
 
     subfic(tmp, tmp, cl_dwords);
     mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
     subf(cnt_dwords, tmp, cnt_dwords); // rest.
     li(tmp, 0);
-//10:
+
   bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
     addi(base_ptr, base_ptr, 8);
     bdnz(startloop);
-//13:
+
   bind(fast);                                  // Clear 128byte blocks.
     srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
     andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
     mtctr(tmp);                                // Load counter.
-//16:
+
   bind(fastloop);
     dcbz(base_ptr);                    // Clear 128byte aligned block.
     addi(base_ptr, base_ptr, cl_size);
     bdnz(fastloop);
-    if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
-//20:
+
   bind(small_rest);
     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
     beq(CCR0, done);                   // rest == 0
     li(tmp, 0);
     mtctr(cnt_dwords);                 // Load counter.
-//24:
+
   bind(restloop);                      // Clear rest.
     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
     addi(base_ptr, base_ptr, 8);
     bdnz(restloop);
-//27:
+
   bind(done);
 }