8189177: AARCH64: Improve _updateBytesCRC32C intrinsic
authordchuyko
Fri, 17 Nov 2017 20:56:14 +0300
changeset 47915 d4af6b80aec3
parent 47914 564882d918d4
child 47916 bdbef8638948
child 47991 a474466c4fdb
8189177: AARCH64: Improve _updateBytesCRC32C intrinsic Reviewed-by: drwhite, dsamersoff
src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Thu Nov 16 20:21:11 2017 -0500
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Nov 17 20:56:14 2017 +0300
@@ -3228,6 +3228,102 @@
     mvnw(crc, crc);
 }
 
+void MacroAssembler::kernel_crc32c_using_crc32c(Register crc, Register buf,
+        Register len, Register tmp0, Register tmp1, Register tmp2,
+        Register tmp3) {
+    Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
+    assert_different_registers(crc, buf, len, tmp0, tmp1, tmp2, tmp3);
+
+    subs(len, len, 128);
+    br(Assembler::GE, CRC_by64_pre);
+  BIND(CRC_less64);
+    adds(len, len, 128-32);
+    br(Assembler::GE, CRC_by32_loop);
+  BIND(CRC_less32);
+    adds(len, len, 32-4);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, CRC_by1_loop);
+    b(L_exit);
+
+  BIND(CRC_by32_loop);
+    ldp(tmp0, tmp1, Address(post(buf, 16)));
+    subs(len, len, 32);
+    crc32cx(crc, crc, tmp0);
+    ldr(tmp2, Address(post(buf, 8)));
+    crc32cx(crc, crc, tmp1);
+    ldr(tmp3, Address(post(buf, 8)));
+    crc32cx(crc, crc, tmp2);
+    crc32cx(crc, crc, tmp3);
+    br(Assembler::GE, CRC_by32_loop);
+    cmn(len, 32);
+    br(Assembler::NE, CRC_less32);
+    b(L_exit);
+
+  BIND(CRC_by4_loop);
+    ldrw(tmp0, Address(post(buf, 4)));
+    subs(len, len, 4);
+    crc32cw(crc, crc, tmp0);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::LE, L_exit);
+  BIND(CRC_by1_loop);
+    ldrb(tmp0, Address(post(buf, 1)));
+    subs(len, len, 1);
+    crc32cb(crc, crc, tmp0);
+    br(Assembler::GT, CRC_by1_loop);
+    b(L_exit);
+
+  BIND(CRC_by64_pre);
+    sub(buf, buf, 8);
+    ldp(tmp0, tmp1, Address(buf, 8));
+    crc32cx(crc, crc, tmp0);
+    ldr(tmp2, Address(buf, 24));
+    crc32cx(crc, crc, tmp1);
+    ldr(tmp3, Address(buf, 32));
+    crc32cx(crc, crc, tmp2);
+    ldr(tmp0, Address(buf, 40));
+    crc32cx(crc, crc, tmp3);
+    ldr(tmp1, Address(buf, 48));
+    crc32cx(crc, crc, tmp0);
+    ldr(tmp2, Address(buf, 56));
+    crc32cx(crc, crc, tmp1);
+    ldr(tmp3, Address(pre(buf, 64)));
+
+    b(CRC_by64_loop);
+
+    align(CodeEntryAlignment);
+  BIND(CRC_by64_loop);
+    subs(len, len, 64);
+    crc32cx(crc, crc, tmp2);
+    ldr(tmp0, Address(buf, 8));
+    crc32cx(crc, crc, tmp3);
+    ldr(tmp1, Address(buf, 16));
+    crc32cx(crc, crc, tmp0);
+    ldr(tmp2, Address(buf, 24));
+    crc32cx(crc, crc, tmp1);
+    ldr(tmp3, Address(buf, 32));
+    crc32cx(crc, crc, tmp2);
+    ldr(tmp0, Address(buf, 40));
+    crc32cx(crc, crc, tmp3);
+    ldr(tmp1, Address(buf, 48));
+    crc32cx(crc, crc, tmp0);
+    ldr(tmp2, Address(buf, 56));
+    crc32cx(crc, crc, tmp1);
+    ldr(tmp3, Address(pre(buf, 64)));
+    br(Assembler::GE, CRC_by64_loop);
+
+    // post-loop
+    crc32cx(crc, crc, tmp2);
+    crc32cx(crc, crc, tmp3);
+
+    sub(len, len, 64);
+    add(buf, buf, 8);
+    cmn(len, 128);
+    br(Assembler::NE, CRC_less64);
+  BIND(L_exit);
+}
+
 /**
  * @param crc   register containing existing CRC (32-bit)
  * @param buf   register pointing to input byte buffer (byte*)
@@ -3238,54 +3334,9 @@
 void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
         Register table0, Register table1, Register table2, Register table3,
         Register tmp, Register tmp2, Register tmp3) {
-  Label L_exit;
-  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop;
-
-    subs(len, len, 64);
-    br(Assembler::GE, CRC_by64_loop);
-    adds(len, len, 64-4);
-    br(Assembler::GE, CRC_by4_loop);
-    adds(len, len, 4);
-    br(Assembler::GT, CRC_by1_loop);
-    b(L_exit);
-
-  BIND(CRC_by4_loop);
-    ldrw(tmp, Address(post(buf, 4)));
-    subs(len, len, 4);
-    crc32cw(crc, crc, tmp);
-    br(Assembler::GE, CRC_by4_loop);
-    adds(len, len, 4);
-    br(Assembler::LE, L_exit);
-  BIND(CRC_by1_loop);
-    ldrb(tmp, Address(post(buf, 1)));
-    subs(len, len, 1);
-    crc32cb(crc, crc, tmp);
-    br(Assembler::GT, CRC_by1_loop);
-    b(L_exit);
-
-    align(CodeEntryAlignment);
-  BIND(CRC_by64_loop);
-    subs(len, len, 64);
-    ldp(tmp, tmp3, Address(post(buf, 16)));
-    crc32cx(crc, crc, tmp);
-    crc32cx(crc, crc, tmp3);
-    ldp(tmp, tmp3, Address(post(buf, 16)));
-    crc32cx(crc, crc, tmp);
-    crc32cx(crc, crc, tmp3);
-    ldp(tmp, tmp3, Address(post(buf, 16)));
-    crc32cx(crc, crc, tmp);
-    crc32cx(crc, crc, tmp3);
-    ldp(tmp, tmp3, Address(post(buf, 16)));
-    crc32cx(crc, crc, tmp);
-    crc32cx(crc, crc, tmp3);
-    br(Assembler::GE, CRC_by64_loop);
-    adds(len, len, 64-4);
-    br(Assembler::GE, CRC_by4_loop);
-    adds(len, len, 4);
-    br(Assembler::GT, CRC_by1_loop);
-  BIND(L_exit);
-    return;
-}
+  kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
+}
+
 
 SkipIfEqual::SkipIfEqual(
     MacroAssembler* masm, const bool* flag_addr, bool value) {
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Thu Nov 16 20:21:11 2017 -0500
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Nov 17 20:56:14 2017 +0300
@@ -1264,6 +1264,9 @@
   void kernel_crc32_using_crc32(Register crc, Register buf,
         Register len, Register tmp0, Register tmp1, Register tmp2,
         Register tmp3);
+  void kernel_crc32c_using_crc32c(Register crc, Register buf,
+        Register len, Register tmp0, Register tmp1, Register tmp2,
+        Register tmp3);
 public:
   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
                        Register zlen, Register tmp1, Register tmp2, Register tmp3,