8130687: aarch64: add support for hardware crc32c
authorenevill
Tue, 07 Jul 2015 15:04:25 +0000
changeset 31591 82134a118aea
parent 31590 427d073af867
child 31635 6bf7a358ca17
8130687: aarch64: add support for hardware crc32c Summary: add support for crc32c using built in crc32c instructions Reviewed-by: kvn, aph
hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Mon Jun 29 15:30:55 2015 +0200
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Jul 07 15:04:25 2015 +0000
@@ -2270,17 +2270,21 @@
   }
 
   // CRC32 instructions
-#define INSN(NAME, sf, sz)                                                \
+#define INSN(NAME, c, sf, sz)                                             \
   void NAME(Register Rd, Register Rn, Register Rm) {                      \
     starti;                                                               \
-    f(sf, 31), f(0b0011010110, 30, 21), f(0b0100, 15, 12), f(sz, 11, 10); \
-    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                                     \
+    f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12);       \
+    f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                      \
   }
 
-  INSN(crc32b, 0, 0b00);
-  INSN(crc32h, 0, 0b01);
-  INSN(crc32w, 0, 0b10);
-  INSN(crc32x, 1, 0b11);
+  INSN(crc32b,  0, 0, 0b00);
+  INSN(crc32h,  0, 0, 0b01);
+  INSN(crc32w,  0, 0, 0b10);
+  INSN(crc32x,  0, 1, 0b11);
+  INSN(crc32cb, 1, 0, 0b00);
+  INSN(crc32ch, 1, 0, 0b01);
+  INSN(crc32cw, 1, 0, 0b10);
+  INSN(crc32cx, 1, 1, 0b11);
 
 #undef INSN
 
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Jun 29 15:30:55 2015 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jul 07 15:04:25 2015 +0000
@@ -2914,6 +2914,65 @@
     ornw(crc, zr, crc);
 }
 
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register that will contain address of CRC table
+ * @param tmp   scratch register
+ */
+void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
+        Register table0, Register table1, Register table2, Register table3,
+        Register tmp, Register tmp2, Register tmp3) {
+  Label L_exit;
+  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop;
+
+    subs(len, len, 64);
+    br(Assembler::GE, CRC_by64_loop);
+    adds(len, len, 64-4);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, CRC_by1_loop);
+    b(L_exit);
+
+  BIND(CRC_by4_loop);
+    ldrw(tmp, Address(post(buf, 4)));
+    subs(len, len, 4);
+    crc32cw(crc, crc, tmp);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::LE, L_exit);
+  BIND(CRC_by1_loop);
+    ldrb(tmp, Address(post(buf, 1)));
+    subs(len, len, 1);
+    crc32cb(crc, crc, tmp);
+    br(Assembler::GT, CRC_by1_loop);
+    b(L_exit);
+
+    align(CodeEntryAlignment);
+  BIND(CRC_by64_loop);
+    subs(len, len, 64);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    br(Assembler::GE, CRC_by64_loop);
+    adds(len, len, 64-4);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, CRC_by1_loop);
+  BIND(L_exit);
+    return;
+}
+
 SkipIfEqual::SkipIfEqual(
     MacroAssembler* masm, const bool* flag_addr, bool value) {
   _masm = masm;
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Jun 29 15:30:55 2015 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jul 07 15:04:25 2015 +0000
@@ -967,6 +967,10 @@
   void kernel_crc32(Register crc, Register buf, Register len,
         Register table0, Register table1, Register table2, Register table3,
         Register tmp, Register tmp2, Register tmp3);
+  // CRC32 code for java.util.zip.CRC32C::updateBytes() instrinsic.
+  void kernel_crc32c(Register crc, Register buf, Register len,
+        Register table0, Register table1, Register table2, Register table3,
+        Register tmp, Register tmp2, Register tmp3);
 
 #undef VIRTUAL
 
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Mon Jun 29 15:30:55 2015 +0200
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Jul 07 15:04:25 2015 +0000
@@ -2359,6 +2359,47 @@
   /**
    *  Arguments:
    *
+   * Inputs:
+   *   c_rarg0   - int crc
+   *   c_rarg1   - byte* buf
+   *   c_rarg2   - int length
+   *   c_rarg3   - int* table
+   *
+   * Ouput:
+   *       rax   - int crc result
+   */
+  address generate_updateBytesCRC32C() {
+    assert(UseCRC32CIntrinsics, "what are we doing here?");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
+
+    address start = __ pc();
+
+    const Register crc   = c_rarg0;  // crc
+    const Register buf   = c_rarg1;  // source java byte array address
+    const Register len   = c_rarg2;  // length
+    const Register table0 = c_rarg3; // crc_table address
+    const Register table1 = c_rarg4;
+    const Register table2 = c_rarg5;
+    const Register table3 = c_rarg6;
+    const Register tmp3 = c_rarg7;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ kernel_crc32c(crc, buf, len,
+              table0, table1, table2, table3, rscratch1, rscratch2, tmp3);
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }
+
+  /**
+   *  Arguments:
+   *
    *  Input:
    *    c_rarg0   - x address
    *    c_rarg1   - x length
@@ -2579,6 +2620,10 @@
       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
     }
 
+    if (UseCRC32CIntrinsics) {
+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Jun 29 15:30:55 2015 +0200
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Tue Jul 07 15:04:25 2015 +0000
@@ -199,9 +199,12 @@
     UseCRC32Intrinsics = true;
   }
 
-  if (UseCRC32CIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
-      warning("CRC32C intrinsics are not available on this CPU");
+  if (auxv & HWCAP_CRC32) {
+    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
+      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
+    }
+  } else if (UseCRC32CIntrinsics) {
+    warning("CRC32C is not available on the CPU");
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }