# HG changeset patch # User kvn # Date 1462401021 25200 # Node ID d972e3a2df532d9f99780164547f26b00fa2a6e5 # Parent 510f77046e00987908140bb95314532858f11728 8155162: java.util.zip.CRC32C Interpreter/C1 intrinsics support on SPARC Reviewed-by: kvn Contributed-by: ahmed.khawaja@oracle.com diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Wed May 04 15:30:21 2016 -0700 @@ -1029,6 +1029,10 @@ } } +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + Unimplemented(); +} + // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f // _i2b, _i2c, _i2s void LIRGenerator::do_Convert(Convert* x) { diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp --- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp Wed May 04 15:30:21 2016 -0700 @@ -1427,3 +1427,7 @@ } } } + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + Unimplemented(); +} diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed May 04 15:30:21 2016 -0700 @@ -868,6 +868,90 @@ } } +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: { + + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C); + int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem end(x->argument_at(3), this); + + buf.load_item(); + off.load_nonconstant(); + end.load_nonconstant(); + + // len = end - off + LIR_Opr len = end.result(); + LIR_Opr tmpA = new_register(T_INT); + LIR_Opr tmpB = new_register(T_INT); + __ move(end.result(), tmpA); + __ move(off.result(), tmpB); + __ sub(tmpA, tmpB, tmpA); + len = tmpA; + + LIR_Opr index = off.result(); + + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + array_offset += off.result()->as_jint(); + } + + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + if (index->is_constant()) { + array_offset += index->as_constant_ptr()->as_jint(); + index = LIR_OprFact::illegalOpr; + } else if (index->is_register()) { + LIR_Opr tmp2 = new_register(T_LONG); + LIR_Opr tmp3 = new_register(T_LONG); + __ move(base_op, tmp2); + __ move(index, tmp3); + __ add(tmp2, tmp3, tmp2); + base_op = tmp2; + } else { + ShouldNotReachHere(); + } + } + + LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE); + + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + __ move(len, cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f // _i2b, _i2c, _i2s void LIRGenerator::do_Convert(Convert* x) { diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Wed May 04 15:30:21 2016 -0700 @@ -4837,21 +4837,21 @@ // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) { - srlx(src, 24, dst); - - sllx(src, 32+8, tmp); - srlx(tmp, 32+24, tmp); - sllx(tmp, 8, tmp); - or3(dst, tmp, dst); - - sllx(src, 32+16, tmp); - srlx(tmp, 32+24, tmp); - sllx(tmp, 16, tmp); - or3(dst, tmp, dst); - - sllx(src, 32+24, tmp); - srlx(tmp, 32, tmp); - or3(dst, tmp, dst); + srlx(src, 24, dst); + + sllx(src, 32+8, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 8, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+16, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 16, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+24, tmp); + srlx(tmp, 32, tmp); + or3(dst, tmp, dst); } void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) { @@ -5103,3 +5103,176 @@ not1(crc); } +#define CHUNK_LEN 128 /* 128 x 8B = 1KB */ +#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */ +#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */ +#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */ + +void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) { + + Label L_crc32c_head, L_crc32c_aligned; + Label L_crc32c_parallel, L_crc32c_parallel_loop; + Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop; + Label L_crc32c_done, L_crc32c_tail, L_crc32c_return; + + set(ExternalAddress(StubRoutines::crc32c_table_addr()), table); + + cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return); + + // clear upper 32 bits of crc + clruwu(crc); + + and3(buf, 7, G4); + cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned); + + mov(8, G1); + sub(G1, G4, G4); + + // ------ process the misaligned head (7 bytes or less) ------ + bind(L_crc32c_head); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + ldub(buf, 0, G1); + update_byte_crc32(crc, G1, table); + + inc(buf); + dec(len); + cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return); + dec(G4); + cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head); + + // ------ process the 8-byte-aligned body ------ + bind(L_crc32c_aligned); + nop(); + cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail); + + // reverse the byte order of lower 32 bits to big endian, and move to FP side + movitof_revbytes(crc, F0, G1, G3); + + set(CHUNK_LEN*8*4, G4); + cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial); + + // ------ process four 1KB chunks in parallel ------ + bind(L_crc32c_parallel); + + fzero(FloatRegisterImpl::D, F2); + fzero(FloatRegisterImpl::D, F4); + fzero(FloatRegisterImpl::D, F6); + + mov(CHUNK_LEN - 1, G4); + bind(L_crc32c_parallel_loop); + // schedule ldf's ahead of crc32c's to hide the load-use latency + ldf(FloatRegisterImpl::D, buf, 0, F8); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14); + crc32c(F0, F8, F0); + crc32c(F2, F10, F2); + crc32c(F4, F12, F4); + crc32c(F6, F14, F6); + inc(buf, 8); + dec(G4); + cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop); + + ldf(FloatRegisterImpl::D, buf, 0, F8); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + crc32c(F0, F8, F0); + crc32c(F2, F10, F2); + crc32c(F4, F12, F4); + + inc(buf, CHUNK_LEN*24); + ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian + inc(buf, 8); + + prefetch(buf, 0, Assembler::severalReads); + prefetch(buf, CHUNK_LEN*8, Assembler::severalReads); + prefetch(buf, CHUNK_LEN*16, Assembler::severalReads); + prefetch(buf, CHUNK_LEN*24, Assembler::severalReads); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + movftoi_revbytes(F0, O4, G1, G4); + movftoi_revbytes(F2, O5, G1, G4); + movftoi_revbytes(F4, G5, G1, G4); + + // combine the results of 4 chunks + set64(CHUNK_K1, G3, G1); + xmulx(O4, G3, O4); + set64(CHUNK_K2, G3, G1); + xmulx(O5, G3, O5); + set64(CHUNK_K3, G3, G1); + xmulx(G5, G3, G5); + + movdtox(F14, G4); + xor3(O4, O5, O5); + xor3(G5, O5, O5); + xor3(G4, O5, O5); + + // reverse the byte order to big endian, via stack, and move to FP side + // TODO: use new revb instruction + add(SP, -8, G1); + srlx(G1, 3, G1); + sllx(G1, 3, G1); + stx(O5, G1, G0); + ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian + + crc32c(F6, F2, F0); + + set(CHUNK_LEN*8*4, G4); + sub(len, G4, len); + cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel); + nop(); + cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done); + + bind(L_crc32c_serial); + + mov(32, G4); + cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8); + + // ------ process 32B chunks ------ + bind(L_crc32c_x32_loop); + ldf(FloatRegisterImpl::D, buf, 0, F2); + crc32c(F0, F2, F0); + ldf(FloatRegisterImpl::D, buf, 8, F2); + crc32c(F0, F2, F0); + ldf(FloatRegisterImpl::D, buf, 16, F2); + crc32c(F0, F2, F0); + ldf(FloatRegisterImpl::D, buf, 24, F2); + inc(buf, 32); + crc32c(F0, F2, F0); + dec(len, 32); + cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop); + + bind(L_crc32c_x8); + nop(); + cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done); + + // ------ process 8B chunks ------ + bind(L_crc32c_x8_loop); + ldf(FloatRegisterImpl::D, buf, 0, F2); + inc(buf, 8); + crc32c(F0, F2, F0); + dec(len, 8); + cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop); + + bind(L_crc32c_done); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + movftoi_revbytes(F0, crc, G1, G3); + + cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return); + + // ------ process the misaligned tail (7 bytes or less) ------ + bind(L_crc32c_tail); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + ldub(buf, 0, G1); + update_byte_crc32(crc, G1, table); + + inc(buf); + dec(len); + cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail); + + bind(L_crc32c_return); + nop(); +} diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp Wed May 04 15:30:21 2016 -0700 @@ -1418,6 +1418,8 @@ // Fold 8-bit data void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp); void fold_8bit_crc32(Register crc, Register table, Register tmp); + // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic. + void kernel_crc32c(Register crc, Register buf, Register len, Register table); }; diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed May 04 15:30:21 2016 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -4909,11 +4909,6 @@ return start; } -#define CHUNK_LEN 128 /* 128 x 8B = 1KB */ -#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */ -#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */ -#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */ - /** * Arguments: * @@ -4938,171 +4933,8 @@ const Register len = O2; // number of bytes const Register table = O3; // byteTable - Label L_crc32c_head, L_crc32c_aligned; - Label L_crc32c_parallel, L_crc32c_parallel_loop; - Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop; - Label L_crc32c_done, L_crc32c_tail, L_crc32c_return; - - __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return); - - // clear upper 32 bits of crc - __ clruwu(crc); - - __ and3(buf, 7, G4); - __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned); - - __ mov(8, G1); - __ sub(G1, G4, G4); - - // ------ process the misaligned head (7 bytes or less) ------ - __ BIND(L_crc32c_head); - - // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; - __ ldub(buf, 0, G1); - __ update_byte_crc32(crc, G1, table); - - __ inc(buf); - __ dec(len); - __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return); - __ dec(G4); - __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head); - - // ------ process the 8-byte-aligned body ------ - __ BIND(L_crc32c_aligned); - __ nop(); - __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail); - - // reverse the byte order of lower 32 bits to big endian, and move to FP side - __ movitof_revbytes(crc, F0, G1, G3); - - __ set(CHUNK_LEN*8*4, G4); - __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial); - - // ------ process four 1KB chunks in parallel ------ - __ BIND(L_crc32c_parallel); - - __ fzero(FloatRegisterImpl::D, F2); - __ fzero(FloatRegisterImpl::D, F4); - __ fzero(FloatRegisterImpl::D, F6); - - __ mov(CHUNK_LEN - 1, G4); - __ BIND(L_crc32c_parallel_loop); - // schedule ldf's ahead of crc32c's to hide the load-use latency - __ ldf(FloatRegisterImpl::D, buf, 0, F8); - __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); - __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); - __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14); - __ crc32c(F0, F8, F0); - __ crc32c(F2, F10, F2); - __ crc32c(F4, F12, F4); - __ crc32c(F6, F14, F6); - __ inc(buf, 8); - __ dec(G4); - __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop); - - __ ldf(FloatRegisterImpl::D, buf, 0, F8); - __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); - __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); - __ crc32c(F0, F8, F0); - __ crc32c(F2, F10, F2); - __ crc32c(F4, F12, F4); - - __ inc(buf, CHUNK_LEN*24); - __ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian - __ inc(buf, 8); - - __ prefetch(buf, 0, Assembler::severalReads); - __ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads); - __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads); - __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads); - - // move to INT side, and reverse the byte order of lower 32 bits to little endian - __ movftoi_revbytes(F0, O4, G1, G4); - __ movftoi_revbytes(F2, O5, G1, G4); - __ movftoi_revbytes(F4, G5, G1, G4); - - // combine the results of 4 chunks - __ set64(CHUNK_K1, G3, G1); - __ xmulx(O4, G3, O4); - __ set64(CHUNK_K2, G3, G1); - __ xmulx(O5, G3, O5); - __ set64(CHUNK_K3, G3, G1); - __ xmulx(G5, G3, G5); - - __ movdtox(F14, G4); - __ xor3(O4, O5, O5); - __ xor3(G5, O5, O5); - __ xor3(G4, O5, O5); - - // reverse the byte order to big endian, via stack, and move to FP side - __ add(SP, -8, G1); - __ srlx(G1, 3, G1); - __ sllx(G1, 3, G1); - __ stx(O5, G1, G0); - __ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian - - __ crc32c(F6, F2, F0); - - __ set(CHUNK_LEN*8*4, G4); - __ sub(len, G4, len); - __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel); - __ nop(); - __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done); - - __ BIND(L_crc32c_serial); - - __ mov(32, G4); - __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8); - - // ------ process 32B chunks ------ - __ BIND(L_crc32c_x32_loop); - __ ldf(FloatRegisterImpl::D, buf, 0, F2); - __ inc(buf, 8); - __ crc32c(F0, F2, F0); - __ ldf(FloatRegisterImpl::D, buf, 0, F2); - __ inc(buf, 8); - __ crc32c(F0, F2, F0); - __ ldf(FloatRegisterImpl::D, buf, 0, F2); - __ inc(buf, 8); - __ crc32c(F0, F2, F0); - __ ldf(FloatRegisterImpl::D, buf, 0, F2); - __ inc(buf, 8); - __ crc32c(F0, F2, F0); - __ dec(len, 32); - __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop); - - __ BIND(L_crc32c_x8); - __ nop(); - __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done); - - // ------ process 8B chunks ------ - __ BIND(L_crc32c_x8_loop); - __ ldf(FloatRegisterImpl::D, buf, 0, F2); - __ inc(buf, 8); - __ crc32c(F0, F2, F0); - __ dec(len, 8); - __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop); - - __ BIND(L_crc32c_done); - - // move to INT side, and reverse the byte order of lower 32 bits to little endian - __ movftoi_revbytes(F0, crc, G1, G3); - - __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return); - - // ------ process the misaligned tail (7 bytes or less) ------ - __ BIND(L_crc32c_tail); - - // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; - __ ldub(buf, 0, G1); - __ update_byte_crc32(crc, G1, table); - - __ inc(buf); - __ dec(len); - __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail); - - __ BIND(L_crc32c_return); - __ nop(); + __ kernel_crc32c(crc, buf, len, table); + __ retl(); __ delayed()->nop(); @@ -5366,6 +5198,12 @@ StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table; StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); } + + if (UseCRC32CIntrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table; + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); + } } @@ -5425,12 +5263,6 @@ StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); } - - // generate CRC32C intrinsic code - if (UseCRC32CIntrinsics) { - StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); - } - // generate Adler32 intrinsics code if (UseAdler32Intrinsics) { StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp Wed May 04 15:30:21 2016 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -147,3 +147,62 @@ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, 0x2d02ef8dUL }; + +/** + * CRC32C constants lookup table + */ +juint StubRoutines::Sparc::_crc32c_table[] = +{ + 0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL, + 0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL, + 0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL, + 0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL, + 0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL, + 0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL, + 0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL, + 0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL, + 0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL, + 0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL, + 0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL, + 0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL, + 0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL, + 0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL, + 0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL, + 0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL, + 0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL, + 0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL, + 0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL, + 0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL, + 0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL, + 0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL, + 0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL, + 0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL, + 0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL, + 0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL, + 0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL, + 0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL, + 0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL, + 0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL, + 0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL, + 0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL, + 0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL, + 0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL, + 0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL, + 0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL, + 0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL, + 0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL, + 0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL, + 0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL, + 0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL, + 0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL, + 0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL, + 0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL, + 0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL, + 0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL, + 0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL, + 0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL, + 0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL, + 0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL, + 0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL, + 0xAD7D5351UL +}; diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Wed May 04 15:30:21 2016 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -56,6 +56,7 @@ // masks and table for CRC32 static uint64_t _crc_by128_masks[]; static juint _crc_table[]; + static juint _crc32c_table[]; public: // test assembler stop routine by setting registers diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp Wed May 04 15:30:21 2016 -0700 @@ -1082,8 +1082,56 @@ return NULL; } -// Not supported +/** + * Method entry for intrinsic-candidate (non-native) methods: + * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) + * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) + * Unlike CRC32, CRC32C does not have any methods marked as native + * CRC32C also uses an "end" variable instead of the length variable CRC32 uses + */ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + + // Load parameters from the stack + const Register crc = O0; // initial crc + const Register buf = O1; // source java byte array address + const Register offset = O2; // offset + const Register end = O3; // index of last element to process + const Register len = O2; // len argument to the kernel + const Register table = O3; // crc32c lookup table address + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ lduw(Gargs, 0, end); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 32, crc); + __ add(buf, offset, buf); + __ sub(end, offset, len); + } else { + __ lduw(Gargs, 0, end); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 24, crc); + __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size + __ add(buf, offset, buf); + __ sub(end, offset, len); + } + + // Call the crc32c kernel + __ MacroAssembler::save_thread(L7_thread_cache); + __ kernel_crc32c(crc, buf, len, table); + __ MacroAssembler::restore_thread(L7_thread_cache); + + // result in O0 + __ retl(); + __ delayed()->nop(); + + return entry; + } return NULL; } diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed May 04 15:30:21 2016 -0700 @@ -1108,6 +1108,10 @@ } } +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + Unimplemented(); +} + // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f // _i2b, _i2c, _i2s LIR_Opr fixed_register_for(BasicType type) { diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/share/vm/c1/c1_Compiler.cpp --- a/hotspot/src/share/vm/c1/c1_Compiler.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp Wed May 04 15:30:21 2016 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -224,6 +224,10 @@ case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: +#ifdef SPARC + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: +#endif case vmIntrinsics::_compareAndSwapInt: case vmIntrinsics::_compareAndSwapObject: case vmIntrinsics::_getCharStringU: diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/share/vm/c1/c1_LIRGenerator.cpp --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Wed May 04 15:30:21 2016 -0700 @@ -3174,6 +3174,11 @@ do_update_CRC32(x); break; + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + do_update_CRC32C(x); + break; + default: ShouldNotReachHere(); break; } } diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/share/vm/c1/c1_LIRGenerator.hpp --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Wed May 04 15:30:21 2016 -0700 @@ -253,6 +253,7 @@ void do_FPIntrinsics(Intrinsic* x); void do_Reference_get(Intrinsic* x); void do_update_CRC32(Intrinsic* x); + void do_update_CRC32C(Intrinsic* x); LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info); LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info); diff -r 510f77046e00 -r d972e3a2df53 hotspot/src/share/vm/c1/c1_Runtime1.cpp --- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp Fri Apr 29 17:24:16 2016 +0200 +++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp Wed May 04 15:30:21 2016 -0700 @@ -318,6 +318,7 @@ FUNCTION_CASE(entry, TRACE_TIME_METHOD); #endif FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); + FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C()); FUNCTION_CASE(entry, StubRoutines::dexp()); FUNCTION_CASE(entry, StubRoutines::dlog()); FUNCTION_CASE(entry, StubRoutines::dlog10());