8155162: java.util.zip.CRC32C Interpreter/C1 intrinsics support on SPARC
Reviewed-by: kvn
Contributed-by: ahmed.khawaja@oracle.com
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Wed May 04 15:30:21 2016 -0700
@@ -1029,6 +1029,10 @@
}
}
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+ Unimplemented();
+}
+
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp Wed May 04 15:30:21 2016 -0700
@@ -1427,3 +1427,7 @@
}
}
}
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+ Unimplemented();
+}
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed May 04 15:30:21 2016 -0700
@@ -868,6 +868,90 @@
}
}
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+ // Make all state_for calls early since they can emit code
+ LIR_Opr result = rlock_result(x);
+ int flags = 0;
+ switch (x->id()) {
+ case vmIntrinsics::_updateBytesCRC32C:
+ case vmIntrinsics::_updateDirectByteBufferCRC32C: {
+
+ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
+ int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+
+ LIRItem crc(x->argument_at(0), this);
+ LIRItem buf(x->argument_at(1), this);
+ LIRItem off(x->argument_at(2), this);
+ LIRItem end(x->argument_at(3), this);
+
+ buf.load_item();
+ off.load_nonconstant();
+ end.load_nonconstant();
+
+ // len = end - off
+ LIR_Opr len = end.result();
+ LIR_Opr tmpA = new_register(T_INT);
+ LIR_Opr tmpB = new_register(T_INT);
+ __ move(end.result(), tmpA);
+ __ move(off.result(), tmpB);
+ __ sub(tmpA, tmpB, tmpA);
+ len = tmpA;
+
+ LIR_Opr index = off.result();
+
+ if(off.result()->is_constant()) {
+ index = LIR_OprFact::illegalOpr;
+ array_offset += off.result()->as_jint();
+ }
+
+ LIR_Opr base_op = buf.result();
+
+ if (index->is_valid()) {
+ LIR_Opr tmp = new_register(T_LONG);
+ __ convert(Bytecodes::_i2l, index, tmp);
+ index = tmp;
+ if (index->is_constant()) {
+ array_offset += index->as_constant_ptr()->as_jint();
+ index = LIR_OprFact::illegalOpr;
+ } else if (index->is_register()) {
+ LIR_Opr tmp2 = new_register(T_LONG);
+ LIR_Opr tmp3 = new_register(T_LONG);
+ __ move(base_op, tmp2);
+ __ move(index, tmp3);
+ __ add(tmp2, tmp3, tmp2);
+ base_op = tmp2;
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+
+ LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE);
+
+ BasicTypeList signature(3);
+ signature.append(T_INT);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+ const LIR_Opr result_reg = result_register_for(x->type());
+
+ LIR_Opr addr = new_pointer_register();
+ __ leal(LIR_OprFact::address(a), addr);
+
+ crc.load_item_force(cc->at(0));
+ __ move(addr, cc->at(1));
+ __ move(len, cc->at(2));
+
+ __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
+ __ move(result_reg, result);
+
+ break;
+ }
+ default: {
+ ShouldNotReachHere();
+ }
+ }
+}
+
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Wed May 04 15:30:21 2016 -0700
@@ -4837,21 +4837,21 @@
// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) {
- srlx(src, 24, dst);
-
- sllx(src, 32+8, tmp);
- srlx(tmp, 32+24, tmp);
- sllx(tmp, 8, tmp);
- or3(dst, tmp, dst);
-
- sllx(src, 32+16, tmp);
- srlx(tmp, 32+24, tmp);
- sllx(tmp, 16, tmp);
- or3(dst, tmp, dst);
-
- sllx(src, 32+24, tmp);
- srlx(tmp, 32, tmp);
- or3(dst, tmp, dst);
+ srlx(src, 24, dst);
+
+ sllx(src, 32+8, tmp);
+ srlx(tmp, 32+24, tmp);
+ sllx(tmp, 8, tmp);
+ or3(dst, tmp, dst);
+
+ sllx(src, 32+16, tmp);
+ srlx(tmp, 32+24, tmp);
+ sllx(tmp, 16, tmp);
+ or3(dst, tmp, dst);
+
+ sllx(src, 32+24, tmp);
+ srlx(tmp, 32, tmp);
+ or3(dst, tmp, dst);
}
void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) {
@@ -5103,3 +5103,176 @@
not1(crc);
}
+#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
+#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
+#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
+#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
+
+void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) {
+
+ Label L_crc32c_head, L_crc32c_aligned;
+ Label L_crc32c_parallel, L_crc32c_parallel_loop;
+ Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
+ Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
+
+ set(ExternalAddress(StubRoutines::crc32c_table_addr()), table);
+
+ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
+
+ // clear upper 32 bits of crc
+ clruwu(crc);
+
+ and3(buf, 7, G4);
+ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
+
+ mov(8, G1);
+ sub(G1, G4, G4);
+
+ // ------ process the misaligned head (7 bytes or less) ------
+ bind(L_crc32c_head);
+
+ // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
+ ldub(buf, 0, G1);
+ update_byte_crc32(crc, G1, table);
+
+ inc(buf);
+ dec(len);
+ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
+ dec(G4);
+ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
+
+ // ------ process the 8-byte-aligned body ------
+ bind(L_crc32c_aligned);
+ nop();
+ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
+
+ // reverse the byte order of lower 32 bits to big endian, and move to FP side
+ movitof_revbytes(crc, F0, G1, G3);
+
+ set(CHUNK_LEN*8*4, G4);
+ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
+
+ // ------ process four 1KB chunks in parallel ------
+ bind(L_crc32c_parallel);
+
+ fzero(FloatRegisterImpl::D, F2);
+ fzero(FloatRegisterImpl::D, F4);
+ fzero(FloatRegisterImpl::D, F6);
+
+ mov(CHUNK_LEN - 1, G4);
+ bind(L_crc32c_parallel_loop);
+ // schedule ldf's ahead of crc32c's to hide the load-use latency
+ ldf(FloatRegisterImpl::D, buf, 0, F8);
+ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
+ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
+ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
+ crc32c(F0, F8, F0);
+ crc32c(F2, F10, F2);
+ crc32c(F4, F12, F4);
+ crc32c(F6, F14, F6);
+ inc(buf, 8);
+ dec(G4);
+ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
+
+ ldf(FloatRegisterImpl::D, buf, 0, F8);
+ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
+ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
+ crc32c(F0, F8, F0);
+ crc32c(F2, F10, F2);
+ crc32c(F4, F12, F4);
+
+ inc(buf, CHUNK_LEN*24);
+ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
+ inc(buf, 8);
+
+ prefetch(buf, 0, Assembler::severalReads);
+ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
+ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
+ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
+
+ // move to INT side, and reverse the byte order of lower 32 bits to little endian
+ movftoi_revbytes(F0, O4, G1, G4);
+ movftoi_revbytes(F2, O5, G1, G4);
+ movftoi_revbytes(F4, G5, G1, G4);
+
+ // combine the results of 4 chunks
+ set64(CHUNK_K1, G3, G1);
+ xmulx(O4, G3, O4);
+ set64(CHUNK_K2, G3, G1);
+ xmulx(O5, G3, O5);
+ set64(CHUNK_K3, G3, G1);
+ xmulx(G5, G3, G5);
+
+ movdtox(F14, G4);
+ xor3(O4, O5, O5);
+ xor3(G5, O5, O5);
+ xor3(G4, O5, O5);
+
+ // reverse the byte order to big endian, via stack, and move to FP side
+ // TODO: use new revb instruction
+ add(SP, -8, G1);
+ srlx(G1, 3, G1);
+ sllx(G1, 3, G1);
+ stx(O5, G1, G0);
+ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
+
+ crc32c(F6, F2, F0);
+
+ set(CHUNK_LEN*8*4, G4);
+ sub(len, G4, len);
+ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
+ nop();
+ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
+
+ bind(L_crc32c_serial);
+
+ mov(32, G4);
+ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
+
+ // ------ process 32B chunks ------
+ bind(L_crc32c_x32_loop);
+ ldf(FloatRegisterImpl::D, buf, 0, F2);
+ crc32c(F0, F2, F0);
+ ldf(FloatRegisterImpl::D, buf, 8, F2);
+ crc32c(F0, F2, F0);
+ ldf(FloatRegisterImpl::D, buf, 16, F2);
+ crc32c(F0, F2, F0);
+ ldf(FloatRegisterImpl::D, buf, 24, F2);
+ inc(buf, 32);
+ crc32c(F0, F2, F0);
+ dec(len, 32);
+ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
+
+ bind(L_crc32c_x8);
+ nop();
+ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
+
+ // ------ process 8B chunks ------
+ bind(L_crc32c_x8_loop);
+ ldf(FloatRegisterImpl::D, buf, 0, F2);
+ inc(buf, 8);
+ crc32c(F0, F2, F0);
+ dec(len, 8);
+ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
+
+ bind(L_crc32c_done);
+
+ // move to INT side, and reverse the byte order of lower 32 bits to little endian
+ movftoi_revbytes(F0, crc, G1, G3);
+
+ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
+
+ // ------ process the misaligned tail (7 bytes or less) ------
+ bind(L_crc32c_tail);
+
+ // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
+ ldub(buf, 0, G1);
+ update_byte_crc32(crc, G1, table);
+
+ inc(buf);
+ dec(len);
+ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
+
+ bind(L_crc32c_return);
+ nop();
+}
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Wed May 04 15:30:21 2016 -0700
@@ -1418,6 +1418,8 @@
// Fold 8-bit data
void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
void fold_8bit_crc32(Register crc, Register table, Register tmp);
+ // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
+ void kernel_crc32c(Register crc, Register buf, Register len, Register table);
};
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -4909,11 +4909,6 @@
return start;
}
-#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
-#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
-#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
-#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
-
/**
* Arguments:
*
@@ -4938,171 +4933,8 @@
const Register len = O2; // number of bytes
const Register table = O3; // byteTable
- Label L_crc32c_head, L_crc32c_aligned;
- Label L_crc32c_parallel, L_crc32c_parallel_loop;
- Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
- Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
-
- __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
-
- // clear upper 32 bits of crc
- __ clruwu(crc);
-
- __ and3(buf, 7, G4);
- __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
-
- __ mov(8, G1);
- __ sub(G1, G4, G4);
-
- // ------ process the misaligned head (7 bytes or less) ------
- __ BIND(L_crc32c_head);
-
- // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
- __ ldub(buf, 0, G1);
- __ update_byte_crc32(crc, G1, table);
-
- __ inc(buf);
- __ dec(len);
- __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
- __ dec(G4);
- __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
-
- // ------ process the 8-byte-aligned body ------
- __ BIND(L_crc32c_aligned);
- __ nop();
- __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
-
- // reverse the byte order of lower 32 bits to big endian, and move to FP side
- __ movitof_revbytes(crc, F0, G1, G3);
-
- __ set(CHUNK_LEN*8*4, G4);
- __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
-
- // ------ process four 1KB chunks in parallel ------
- __ BIND(L_crc32c_parallel);
-
- __ fzero(FloatRegisterImpl::D, F2);
- __ fzero(FloatRegisterImpl::D, F4);
- __ fzero(FloatRegisterImpl::D, F6);
-
- __ mov(CHUNK_LEN - 1, G4);
- __ BIND(L_crc32c_parallel_loop);
- // schedule ldf's ahead of crc32c's to hide the load-use latency
- __ ldf(FloatRegisterImpl::D, buf, 0, F8);
- __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
- __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
- __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
- __ crc32c(F0, F8, F0);
- __ crc32c(F2, F10, F2);
- __ crc32c(F4, F12, F4);
- __ crc32c(F6, F14, F6);
- __ inc(buf, 8);
- __ dec(G4);
- __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
-
- __ ldf(FloatRegisterImpl::D, buf, 0, F8);
- __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
- __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
- __ crc32c(F0, F8, F0);
- __ crc32c(F2, F10, F2);
- __ crc32c(F4, F12, F4);
-
- __ inc(buf, CHUNK_LEN*24);
- __ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
- __ inc(buf, 8);
-
- __ prefetch(buf, 0, Assembler::severalReads);
- __ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
- __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
- __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
-
- // move to INT side, and reverse the byte order of lower 32 bits to little endian
- __ movftoi_revbytes(F0, O4, G1, G4);
- __ movftoi_revbytes(F2, O5, G1, G4);
- __ movftoi_revbytes(F4, G5, G1, G4);
-
- // combine the results of 4 chunks
- __ set64(CHUNK_K1, G3, G1);
- __ xmulx(O4, G3, O4);
- __ set64(CHUNK_K2, G3, G1);
- __ xmulx(O5, G3, O5);
- __ set64(CHUNK_K3, G3, G1);
- __ xmulx(G5, G3, G5);
-
- __ movdtox(F14, G4);
- __ xor3(O4, O5, O5);
- __ xor3(G5, O5, O5);
- __ xor3(G4, O5, O5);
-
- // reverse the byte order to big endian, via stack, and move to FP side
- __ add(SP, -8, G1);
- __ srlx(G1, 3, G1);
- __ sllx(G1, 3, G1);
- __ stx(O5, G1, G0);
- __ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
-
- __ crc32c(F6, F2, F0);
-
- __ set(CHUNK_LEN*8*4, G4);
- __ sub(len, G4, len);
- __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
- __ nop();
- __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
-
- __ BIND(L_crc32c_serial);
-
- __ mov(32, G4);
- __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
-
- // ------ process 32B chunks ------
- __ BIND(L_crc32c_x32_loop);
- __ ldf(FloatRegisterImpl::D, buf, 0, F2);
- __ inc(buf, 8);
- __ crc32c(F0, F2, F0);
- __ ldf(FloatRegisterImpl::D, buf, 0, F2);
- __ inc(buf, 8);
- __ crc32c(F0, F2, F0);
- __ ldf(FloatRegisterImpl::D, buf, 0, F2);
- __ inc(buf, 8);
- __ crc32c(F0, F2, F0);
- __ ldf(FloatRegisterImpl::D, buf, 0, F2);
- __ inc(buf, 8);
- __ crc32c(F0, F2, F0);
- __ dec(len, 32);
- __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
-
- __ BIND(L_crc32c_x8);
- __ nop();
- __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
-
- // ------ process 8B chunks ------
- __ BIND(L_crc32c_x8_loop);
- __ ldf(FloatRegisterImpl::D, buf, 0, F2);
- __ inc(buf, 8);
- __ crc32c(F0, F2, F0);
- __ dec(len, 8);
- __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
-
- __ BIND(L_crc32c_done);
-
- // move to INT side, and reverse the byte order of lower 32 bits to little endian
- __ movftoi_revbytes(F0, crc, G1, G3);
-
- __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
-
- // ------ process the misaligned tail (7 bytes or less) ------
- __ BIND(L_crc32c_tail);
-
- // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
- __ ldub(buf, 0, G1);
- __ update_byte_crc32(crc, G1, table);
-
- __ inc(buf);
- __ dec(len);
- __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
-
- __ BIND(L_crc32c_return);
- __ nop();
+ __ kernel_crc32c(crc, buf, len, table);
+
__ retl();
__ delayed()->nop();
@@ -5366,6 +5198,12 @@
StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
}
+
+ if (UseCRC32CIntrinsics) {
+ // set table address before stub generation which use it
+ StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
+ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
+ }
}
@@ -5425,12 +5263,6 @@
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
-
- // generate CRC32C intrinsic code
- if (UseCRC32CIntrinsics) {
- StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
- }
-
// generate Adler32 intrinsics code
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -147,3 +147,62 @@
0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
0x2d02ef8dUL
};
+
+/**
+ * CRC32C constants lookup table
+ */
+juint StubRoutines::Sparc::_crc32c_table[] =
+{
+ 0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL,
+ 0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL,
+ 0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL,
+ 0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL,
+ 0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL,
+ 0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL,
+ 0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL,
+ 0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL,
+ 0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL,
+ 0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL,
+ 0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL,
+ 0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL,
+ 0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL,
+ 0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL,
+ 0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL,
+ 0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL,
+ 0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL,
+ 0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL,
+ 0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL,
+ 0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL,
+ 0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL,
+ 0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL,
+ 0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL,
+ 0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL,
+ 0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL,
+ 0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL,
+ 0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL,
+ 0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL,
+ 0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL,
+ 0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL,
+ 0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL,
+ 0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL,
+ 0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL,
+ 0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL,
+ 0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL,
+ 0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL,
+ 0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL,
+ 0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL,
+ 0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL,
+ 0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL,
+ 0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL,
+ 0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL,
+ 0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL,
+ 0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL,
+ 0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL,
+ 0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL,
+ 0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL,
+ 0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL,
+ 0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL,
+ 0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL,
+ 0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL,
+ 0xAD7D5351UL
+};
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -56,6 +56,7 @@
// masks and table for CRC32
static uint64_t _crc_by128_masks[];
static juint _crc_table[];
+ static juint _crc32c_table[];
public:
// test assembler stop routine by setting registers
--- a/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp Wed May 04 15:30:21 2016 -0700
@@ -1082,8 +1082,56 @@
return NULL;
}
-// Not supported
+/**
+ * Method entry for intrinsic-candidate (non-native) methods:
+ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
+ * Unlike CRC32, CRC32C does not have any methods marked as native
+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
+ */
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+ if (UseCRC32CIntrinsics) {
+ address entry = __ pc();
+
+ // Load parameters from the stack
+ const Register crc = O0; // initial crc
+ const Register buf = O1; // source java byte array address
+ const Register offset = O2; // offset
+ const Register end = O3; // index of last element to process
+ const Register len = O2; // len argument to the kernel
+ const Register table = O3; // crc32c lookup table address
+
+ // Arguments are reversed on java expression stack
+ // Calculate address of start element
+ if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
+ __ lduw(Gargs, 0, end);
+ __ lduw(Gargs, 8, offset);
+ __ ldx( Gargs, 16, buf);
+ __ lduw(Gargs, 32, crc);
+ __ add(buf, offset, buf);
+ __ sub(end, offset, len);
+ } else {
+ __ lduw(Gargs, 0, end);
+ __ lduw(Gargs, 8, offset);
+ __ ldx( Gargs, 16, buf);
+ __ lduw(Gargs, 24, crc);
+ __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
+ __ add(buf, offset, buf);
+ __ sub(end, offset, len);
+ }
+
+ // Call the crc32c kernel
+ __ MacroAssembler::save_thread(L7_thread_cache);
+ __ kernel_crc32c(crc, buf, len, table);
+ __ MacroAssembler::restore_thread(L7_thread_cache);
+
+ // result in O0
+ __ retl();
+ __ delayed()->nop();
+
+ return entry;
+ }
return NULL;
}
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed May 04 15:30:21 2016 -0700
@@ -1108,6 +1108,10 @@
}
}
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+ Unimplemented();
+}
+
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
LIR_Opr fixed_register_for(BasicType type) {
--- a/hotspot/src/share/vm/c1/c1_Compiler.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp Wed May 04 15:30:21 2016 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -224,6 +224,10 @@
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32:
+#ifdef SPARC
+ case vmIntrinsics::_updateBytesCRC32C:
+ case vmIntrinsics::_updateDirectByteBufferCRC32C:
+#endif
case vmIntrinsics::_compareAndSwapInt:
case vmIntrinsics::_compareAndSwapObject:
case vmIntrinsics::_getCharStringU:
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Wed May 04 15:30:21 2016 -0700
@@ -3174,6 +3174,11 @@
do_update_CRC32(x);
break;
+ case vmIntrinsics::_updateBytesCRC32C:
+ case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ do_update_CRC32C(x);
+ break;
+
default: ShouldNotReachHere(); break;
}
}
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Wed May 04 15:30:21 2016 -0700
@@ -253,6 +253,7 @@
void do_FPIntrinsics(Intrinsic* x);
void do_Reference_get(Intrinsic* x);
void do_update_CRC32(Intrinsic* x);
+ void do_update_CRC32C(Intrinsic* x);
LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp Fri Apr 29 17:24:16 2016 +0200
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp Wed May 04 15:30:21 2016 -0700
@@ -318,6 +318,7 @@
FUNCTION_CASE(entry, TRACE_TIME_METHOD);
#endif
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
+ FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
FUNCTION_CASE(entry, StubRoutines::dexp());
FUNCTION_CASE(entry, StubRoutines::dlog());
FUNCTION_CASE(entry, StubRoutines::dlog10());