8155162: java.util.zip.CRC32C Interpreter/C1 intrinsics support on SPARC
authorkvn
Wed, 04 May 2016 15:30:21 -0700
changeset 38237 d972e3a2df53
parent 38236 510f77046e00
child 38238 1bbcc430c78d
8155162: java.util.zip.CRC32C Interpreter/C1 intrinsics support on SPARC Reviewed-by: kvn Contributed-by: ahmed.khawaja@oracle.com
hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp
hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp
hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp
hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp
hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
hotspot/src/share/vm/c1/c1_Compiler.cpp
hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
hotspot/src/share/vm/c1/c1_Runtime1.cpp
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1029,6 +1029,10 @@
   }
 }
 
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1427,3 +1427,7 @@
     }
   }
 }
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed May 04 15:30:21 2016 -0700
@@ -868,6 +868,90 @@
   }
 }
 
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateBytesCRC32C:
+    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
+
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
+      int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem end(x->argument_at(3), this);
+
+      buf.load_item();
+      off.load_nonconstant();
+      end.load_nonconstant();
+
+      // len = end - off
+      LIR_Opr len  = end.result();
+      LIR_Opr tmpA = new_register(T_INT);
+      LIR_Opr tmpB = new_register(T_INT);
+      __ move(end.result(), tmpA);
+      __ move(off.result(), tmpB);
+      __ sub(tmpA, tmpB, tmpA);
+      len = tmpA;
+
+      LIR_Opr index = off.result();
+
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        array_offset += off.result()->as_jint();
+      }
+
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+        if (index->is_constant()) {
+          array_offset += index->as_constant_ptr()->as_jint();
+          index = LIR_OprFact::illegalOpr;
+        } else if (index->is_register()) {
+          LIR_Opr tmp2 = new_register(T_LONG);
+          LIR_Opr tmp3 = new_register(T_LONG);
+          __ move(base_op, tmp2);
+          __ move(index, tmp3);
+          __ add(tmp2, tmp3, tmp2);
+          base_op = tmp2;
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+
+      LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE);
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      __ move(len, cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed May 04 15:30:21 2016 -0700
@@ -4837,21 +4837,21 @@
 
 // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
 void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) {
-  srlx(src, 24, dst);
-
-  sllx(src, 32+8, tmp);
-  srlx(tmp, 32+24, tmp);
-  sllx(tmp, 8, tmp);
-  or3(dst, tmp, dst);
-
-  sllx(src, 32+16, tmp);
-  srlx(tmp, 32+24, tmp);
-  sllx(tmp, 16, tmp);
-  or3(dst, tmp, dst);
-
-  sllx(src, 32+24, tmp);
-  srlx(tmp, 32, tmp);
-  or3(dst, tmp, dst);
+    srlx(src, 24, dst);
+
+    sllx(src, 32+8, tmp);
+    srlx(tmp, 32+24, tmp);
+    sllx(tmp, 8, tmp);
+    or3(dst, tmp, dst);
+
+    sllx(src, 32+16, tmp);
+    srlx(tmp, 32+24, tmp);
+    sllx(tmp, 16, tmp);
+    or3(dst, tmp, dst);
+
+    sllx(src, 32+24, tmp);
+    srlx(tmp, 32, tmp);
+    or3(dst, tmp, dst);
 }
 
 void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) {
@@ -5103,3 +5103,176 @@
   not1(crc);
 }
 
+#define CHUNK_LEN   128          /* 128 x 8B = 1KB */
+#define CHUNK_K1    0x1307a0206  /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
+#define CHUNK_K2    0x1a0f717c4  /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
+#define CHUNK_K3    0x0170076fa  /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
+
+void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) {
+
+  Label L_crc32c_head, L_crc32c_aligned;
+  Label L_crc32c_parallel, L_crc32c_parallel_loop;
+  Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
+  Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
+
+  set(ExternalAddress(StubRoutines::crc32c_table_addr()), table);
+
+  cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
+
+  // clear upper 32 bits of crc
+  clruwu(crc);
+
+  and3(buf, 7, G4);
+  cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
+
+  mov(8, G1);
+  sub(G1, G4, G4);
+
+  // ------ process the misaligned head (7 bytes or less) ------
+  bind(L_crc32c_head);
+
+  // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
+  ldub(buf, 0, G1);
+  update_byte_crc32(crc, G1, table);
+
+  inc(buf);
+  dec(len);
+  cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
+  dec(G4);
+  cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
+
+  // ------ process the 8-byte-aligned body ------
+  bind(L_crc32c_aligned);
+  nop();
+  cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
+
+  // reverse the byte order of lower 32 bits to big endian, and move to FP side
+  movitof_revbytes(crc, F0, G1, G3);
+
+  set(CHUNK_LEN*8*4, G4);
+  cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
+
+  // ------ process four 1KB chunks in parallel ------
+  bind(L_crc32c_parallel);
+
+  fzero(FloatRegisterImpl::D, F2);
+  fzero(FloatRegisterImpl::D, F4);
+  fzero(FloatRegisterImpl::D, F6);
+
+  mov(CHUNK_LEN - 1, G4);
+  bind(L_crc32c_parallel_loop);
+  // schedule ldf's ahead of crc32c's to hide the load-use latency
+  ldf(FloatRegisterImpl::D, buf, 0,            F8);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
+  crc32c(F0, F8,  F0);
+  crc32c(F2, F10, F2);
+  crc32c(F4, F12, F4);
+  crc32c(F6, F14, F6);
+  inc(buf, 8);
+  dec(G4);
+  cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
+
+  ldf(FloatRegisterImpl::D, buf, 0,            F8);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
+  crc32c(F0, F8,  F0);
+  crc32c(F2, F10, F2);
+  crc32c(F4, F12, F4);
+
+  inc(buf, CHUNK_LEN*24);
+  ldfl(FloatRegisterImpl::D, buf, G0, F14);  // load in little endian
+  inc(buf, 8);
+
+  prefetch(buf, 0,            Assembler::severalReads);
+  prefetch(buf, CHUNK_LEN*8,  Assembler::severalReads);
+  prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
+  prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
+
+  // move to INT side, and reverse the byte order of lower 32 bits to little endian
+  movftoi_revbytes(F0, O4, G1, G4);
+  movftoi_revbytes(F2, O5, G1, G4);
+  movftoi_revbytes(F4, G5, G1, G4);
+
+  // combine the results of 4 chunks
+  set64(CHUNK_K1, G3, G1);
+  xmulx(O4, G3, O4);
+  set64(CHUNK_K2, G3, G1);
+  xmulx(O5, G3, O5);
+  set64(CHUNK_K3, G3, G1);
+  xmulx(G5, G3, G5);
+
+  movdtox(F14, G4);
+  xor3(O4, O5, O5);
+  xor3(G5, O5, O5);
+  xor3(G4, O5, O5);
+
+  // reverse the byte order to big endian, via stack, and move to FP side
+  // TODO: use new revb instruction
+  add(SP, -8, G1);
+  srlx(G1, 3, G1);
+  sllx(G1, 3, G1);
+  stx(O5, G1, G0);
+  ldfl(FloatRegisterImpl::D, G1, G0, F2);  // load in little endian
+
+  crc32c(F6, F2, F0);
+
+  set(CHUNK_LEN*8*4, G4);
+  sub(len, G4, len);
+  cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
+  nop();
+  cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
+
+  bind(L_crc32c_serial);
+
+  mov(32, G4);
+  cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
+
+  // ------ process 32B chunks ------
+  bind(L_crc32c_x32_loop);
+  ldf(FloatRegisterImpl::D, buf, 0, F2);
+  crc32c(F0, F2, F0);
+  ldf(FloatRegisterImpl::D, buf, 8, F2);
+  crc32c(F0, F2, F0);
+  ldf(FloatRegisterImpl::D, buf, 16, F2);
+  crc32c(F0, F2, F0);
+  ldf(FloatRegisterImpl::D, buf, 24, F2);
+  inc(buf, 32);
+  crc32c(F0, F2, F0);
+  dec(len, 32);
+  cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
+
+  bind(L_crc32c_x8);
+  nop();
+  cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
+
+  // ------ process 8B chunks ------
+  bind(L_crc32c_x8_loop);
+  ldf(FloatRegisterImpl::D, buf, 0, F2);
+  inc(buf, 8);
+  crc32c(F0, F2, F0);
+  dec(len, 8);
+  cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
+
+  bind(L_crc32c_done);
+
+  // move to INT side, and reverse the byte order of lower 32 bits to little endian
+  movftoi_revbytes(F0, crc, G1, G3);
+
+  cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
+
+  // ------ process the misaligned tail (7 bytes or less) ------
+  bind(L_crc32c_tail);
+
+  // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
+  ldub(buf, 0, G1);
+  update_byte_crc32(crc, G1, table);
+
+  inc(buf);
+  dec(len);
+  cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
+
+  bind(L_crc32c_return);
+  nop();
+}
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed May 04 15:30:21 2016 -0700
@@ -1418,6 +1418,8 @@
   // Fold 8-bit data
   void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
   void fold_8bit_crc32(Register crc, Register table, Register tmp);
+  // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
+  void kernel_crc32c(Register crc, Register buf, Register len, Register table);
 
 };
 
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -4909,11 +4909,6 @@
       return start;
   }
 
-#define CHUNK_LEN   128          /* 128 x 8B = 1KB */
-#define CHUNK_K1    0x1307a0206  /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
-#define CHUNK_K2    0x1a0f717c4  /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
-#define CHUNK_K3    0x0170076fa  /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
-
   /**
    *  Arguments:
    *
@@ -4938,171 +4933,8 @@
     const Register len   = O2;  // number of bytes
     const Register table = O3;  // byteTable
 
-    Label L_crc32c_head, L_crc32c_aligned;
-    Label L_crc32c_parallel, L_crc32c_parallel_loop;
-    Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
-    Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
-
-    __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
-
-    // clear upper 32 bits of crc
-    __ clruwu(crc);
-
-    __ and3(buf, 7, G4);
-    __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
-
-    __ mov(8, G1);
-    __ sub(G1, G4, G4);
-
-    // ------ process the misaligned head (7 bytes or less) ------
-    __ BIND(L_crc32c_head);
-
-    // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
-    __ ldub(buf, 0, G1);
-    __ update_byte_crc32(crc, G1, table);
-
-    __ inc(buf);
-    __ dec(len);
-    __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
-    __ dec(G4);
-    __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
-
-    // ------ process the 8-byte-aligned body ------
-    __ BIND(L_crc32c_aligned);
-    __ nop();
-    __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
-
-    // reverse the byte order of lower 32 bits to big endian, and move to FP side
-    __ movitof_revbytes(crc, F0, G1, G3);
-
-    __ set(CHUNK_LEN*8*4, G4);
-    __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
-
-    // ------ process four 1KB chunks in parallel ------
-    __ BIND(L_crc32c_parallel);
-
-    __ fzero(FloatRegisterImpl::D, F2);
-    __ fzero(FloatRegisterImpl::D, F4);
-    __ fzero(FloatRegisterImpl::D, F6);
-
-    __ mov(CHUNK_LEN - 1, G4);
-    __ BIND(L_crc32c_parallel_loop);
-    // schedule ldf's ahead of crc32c's to hide the load-use latency
-    __ ldf(FloatRegisterImpl::D, buf, 0,            F8);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
-    __ crc32c(F0, F8,  F0);
-    __ crc32c(F2, F10, F2);
-    __ crc32c(F4, F12, F4);
-    __ crc32c(F6, F14, F6);
-    __ inc(buf, 8);
-    __ dec(G4);
-    __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
-
-    __ ldf(FloatRegisterImpl::D, buf, 0,            F8);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
-    __ crc32c(F0, F8,  F0);
-    __ crc32c(F2, F10, F2);
-    __ crc32c(F4, F12, F4);
-
-    __ inc(buf, CHUNK_LEN*24);
-    __ ldfl(FloatRegisterImpl::D, buf, G0, F14);  // load in little endian
-    __ inc(buf, 8);
-
-    __ prefetch(buf, 0,            Assembler::severalReads);
-    __ prefetch(buf, CHUNK_LEN*8,  Assembler::severalReads);
-    __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
-    __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
-
-    // move to INT side, and reverse the byte order of lower 32 bits to little endian
-    __ movftoi_revbytes(F0, O4, G1, G4);
-    __ movftoi_revbytes(F2, O5, G1, G4);
-    __ movftoi_revbytes(F4, G5, G1, G4);
-
-    // combine the results of 4 chunks
-    __ set64(CHUNK_K1, G3, G1);
-    __ xmulx(O4, G3, O4);
-    __ set64(CHUNK_K2, G3, G1);
-    __ xmulx(O5, G3, O5);
-    __ set64(CHUNK_K3, G3, G1);
-    __ xmulx(G5, G3, G5);
-
-    __ movdtox(F14, G4);
-    __ xor3(O4, O5, O5);
-    __ xor3(G5, O5, O5);
-    __ xor3(G4, O5, O5);
-
-    // reverse the byte order to big endian, via stack, and move to FP side
-    __ add(SP, -8, G1);
-    __ srlx(G1, 3, G1);
-    __ sllx(G1, 3, G1);
-    __ stx(O5, G1, G0);
-    __ ldfl(FloatRegisterImpl::D, G1, G0, F2);  // load in little endian
-
-    __ crc32c(F6, F2, F0);
-
-    __ set(CHUNK_LEN*8*4, G4);
-    __ sub(len, G4, len);
-    __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
-    __ nop();
-    __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
-
-    __ BIND(L_crc32c_serial);
-
-    __ mov(32, G4);
-    __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
-
-    // ------ process 32B chunks ------
-    __ BIND(L_crc32c_x32_loop);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ dec(len, 32);
-    __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
-
-    __ BIND(L_crc32c_x8);
-    __ nop();
-    __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
-
-    // ------ process 8B chunks ------
-    __ BIND(L_crc32c_x8_loop);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ dec(len, 8);
-    __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
-
-    __ BIND(L_crc32c_done);
-
-    // move to INT side, and reverse the byte order of lower 32 bits to little endian
-    __ movftoi_revbytes(F0, crc, G1, G3);
-
-    __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
-
-    // ------ process the misaligned tail (7 bytes or less) ------
-    __ BIND(L_crc32c_tail);
-
-    // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
-    __ ldub(buf, 0, G1);
-    __ update_byte_crc32(crc, G1, table);
-
-    __ inc(buf);
-    __ dec(len);
-    __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
-
-    __ BIND(L_crc32c_return);
-    __ nop();
+    __ kernel_crc32c(crc, buf, len, table);
+
     __ retl();
     __ delayed()->nop();
 
@@ -5366,6 +5198,12 @@
       StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
     }
+
+    if (UseCRC32CIntrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
+    }
   }
 
 
@@ -5425,12 +5263,6 @@
       StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
       StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
     }
-
-    // generate CRC32C intrinsic code
-    if (UseCRC32CIntrinsics) {
-      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
-    }
-
     // generate Adler32 intrinsics code
     if (UseAdler32Intrinsics) {
       StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,3 +147,62 @@
     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
     0x2d02ef8dUL
 };
+
+/**
+ * CRC32C constants lookup table
+ */
+juint StubRoutines::Sparc::_crc32c_table[] =
+{
+    0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL,
+    0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL,
+    0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL,
+    0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL,
+    0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL,
+    0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL,
+    0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL,
+    0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL,
+    0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL,
+    0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL,
+    0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL,
+    0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL,
+    0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL,
+    0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL,
+    0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL,
+    0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL,
+    0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL,
+    0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL,
+    0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL,
+    0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL,
+    0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL,
+    0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL,
+    0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL,
+    0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL,
+    0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL,
+    0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL,
+    0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL,
+    0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL,
+    0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL,
+    0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL,
+    0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL,
+    0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL,
+    0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL,
+    0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL,
+    0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL,
+    0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL,
+    0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL,
+    0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL,
+    0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL,
+    0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL,
+    0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL,
+    0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL,
+    0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL,
+    0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL,
+    0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL,
+    0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL,
+    0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL,
+    0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL,
+    0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL,
+    0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL,
+    0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL,
+    0xAD7D5351UL
+};
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,6 +56,7 @@
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
+  static juint    _crc32c_table[];
 
  public:
   // test assembler stop routine by setting registers
--- a/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1082,8 +1082,56 @@
   return NULL;
 }
 
-// Not supported
+/**
+ * Method entry for intrinsic-candidate (non-native) methods:
+ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
+ * Unlike CRC32, CRC32C does not have any methods marked as native
+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
+ */
 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+  if (UseCRC32CIntrinsics) {
+    address entry = __ pc();
+
+    // Load parameters from the stack
+    const Register crc    = O0; // initial crc
+    const Register buf    = O1; // source java byte array address
+    const Register offset = O2; // offset
+    const Register end    = O3; // index of last element to process
+    const Register len    = O2; // len argument to the kernel
+    const Register table  = O3; // crc32c lookup table address
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
+      __ lduw(Gargs, 0,  end);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 32, crc);
+      __ add(buf, offset, buf);
+      __ sub(end, offset, len);
+    } else {
+      __ lduw(Gargs, 0,  end);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 24, crc);
+      __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
+      __ add(buf, offset, buf);
+      __ sub(end, offset, len);
+    }
+
+    // Call the crc32c kernel
+    __ MacroAssembler::save_thread(L7_thread_cache);
+    __ kernel_crc32c(crc, buf, len, table);
+    __ MacroAssembler::restore_thread(L7_thread_cache);
+
+    // result in O0
+    __ retl();
+    __ delayed()->nop();
+
+    return entry;
+  }
   return NULL;
 }
 
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1108,6 +1108,10 @@
   }
 }
 
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 LIR_Opr fixed_register_for(BasicType type) {
--- a/hotspot/src/share/vm/c1/c1_Compiler.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp	Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -224,6 +224,10 @@
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
+#ifdef SPARC
+  case vmIntrinsics::_updateBytesCRC32C:
+  case vmIntrinsics::_updateDirectByteBufferCRC32C:
+#endif
   case vmIntrinsics::_compareAndSwapInt:
   case vmIntrinsics::_compareAndSwapObject:
   case vmIntrinsics::_getCharStringU:
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed May 04 15:30:21 2016 -0700
@@ -3174,6 +3174,11 @@
     do_update_CRC32(x);
     break;
 
+  case vmIntrinsics::_updateBytesCRC32C:
+  case vmIntrinsics::_updateDirectByteBufferCRC32C:
+    do_update_CRC32C(x);
+    break;
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Wed May 04 15:30:21 2016 -0700
@@ -253,6 +253,7 @@
   void do_FPIntrinsics(Intrinsic* x);
   void do_Reference_get(Intrinsic* x);
   void do_update_CRC32(Intrinsic* x);
+  void do_update_CRC32C(Intrinsic* x);
 
   LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
   LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed May 04 15:30:21 2016 -0700
@@ -318,6 +318,7 @@
   FUNCTION_CASE(entry, TRACE_TIME_METHOD);
 #endif
   FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
+  FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
   FUNCTION_CASE(entry, StubRoutines::dexp());
   FUNCTION_CASE(entry, StubRoutines::dlog());
   FUNCTION_CASE(entry, StubRoutines::dlog10());