7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
Summary: add intrinsics using new instruction to interpreter, C1, C2, for suitable x86; add test
Reviewed-by: kvn, twisti
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2946,6 +2946,9 @@
}
}
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+ fatal("CRC32 intrinsic is not implemented on this platform");
+}
void LIR_Assembler::emit_lock(LIR_OpLock* op) {
Register obj = op->obj_opr()->as_register();
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -784,6 +784,10 @@
set_no_result(x);
}
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+ fatal("CRC32 intrinsic is not implemented on this platform");
+}
+
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1673,6 +1673,11 @@
emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
}
+void Assembler::movdqa(XMMRegister dst, Address src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+}
+
void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
@@ -2286,6 +2291,38 @@
emit_int8(imm8);
}
+void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ emit_int8(0x16);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8(imm8);
+}
+
+void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ emit_int8(0x16);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8(imm8);
+}
+
+void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ emit_int8(0x22);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8(imm8);
+}
+
+void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ emit_int8(0x22);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8(imm8);
+}
+
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
@@ -3691,6 +3728,16 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+// Carry-Less Multiplication Quadword
+void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
+ assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
+ bool vector256 = false;
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ emit_int8(0x44);
+ emit_int8((unsigned char)(0xC0 | encode));
+ emit_int8((unsigned char)mask);
+}
+
void Assembler::vzeroupper() {
assert(VM_Version::supports_avx(), "");
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1266,6 +1266,7 @@
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, XMMRegister src);
+ void movdqa(XMMRegister dst, Address src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src);
@@ -1404,6 +1405,14 @@
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
+ // SSE 4.1 extract
+ void pextrd(Register dst, XMMRegister src, int imm8);
+ void pextrq(Register dst, XMMRegister src, int imm8);
+
+ // SSE 4.1 insert
+ void pinsrd(XMMRegister dst, Register src, int imm8);
+ void pinsrq(XMMRegister dst, Register src, int imm8);
+
// SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
@@ -1764,6 +1773,9 @@
// duplicate 4-bytes integer data from src into 8 locations in dest
void vpbroadcastd(XMMRegister dst, XMMRegister src);
+ // Carry-Less Multiplication Quadword
+ void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
+
// AVX instruction which is used to clear upper 128 bits of YMM registers and
// to avoid transaction penalty between AVX and SSE states. There is no
// penalty if legacy SSE instructions are encoded using VEX prefix because
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3512,6 +3512,22 @@
__ bind(*stub->continuation());
}
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+ assert(op->crc()->is_single_cpu(), "crc must be register");
+ assert(op->val()->is_single_cpu(), "byte value must be register");
+ assert(op->result_opr()->is_single_cpu(), "result must be register");
+ Register crc = op->crc()->as_register();
+ Register val = op->val()->as_register();
+ Register res = op->result_opr()->as_register();
+
+ assert_different_registers(val, crc, res);
+
+ __ lea(res, ExternalAddress(StubRoutines::crc_table_addr()));
+ __ notl(crc); // ~crc
+ __ update_byte_crc32(crc, val, res);
+ __ notl(crc); // ~crc
+ __ mov(res, crc);
+}
void LIR_Assembler::emit_lock(LIR_OpLock* op) {
Register obj = op->obj_opr()->as_register(); // may not be an oop
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -932,6 +932,81 @@
__ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
}
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+ assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+ // Make all state_for calls early since they can emit code
+ LIR_Opr result = rlock_result(x);
+ int flags = 0;
+ switch (x->id()) {
+ case vmIntrinsics::_updateCRC32: {
+ LIRItem crc(x->argument_at(0), this);
+ LIRItem val(x->argument_at(1), this);
+ crc.load_item();
+ val.load_item();
+ __ update_crc32(crc.result(), val.result(), result);
+ break;
+ }
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32: {
+ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+ LIRItem crc(x->argument_at(0), this);
+ LIRItem buf(x->argument_at(1), this);
+ LIRItem off(x->argument_at(2), this);
+ LIRItem len(x->argument_at(3), this);
+ buf.load_item();
+ off.load_nonconstant();
+
+ LIR_Opr index = off.result();
+ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+ if(off.result()->is_constant()) {
+ index = LIR_OprFact::illegalOpr;
+ offset += off.result()->as_jint();
+ }
+ LIR_Opr base_op = buf.result();
+
+#ifndef _LP64
+ if (!is_updateBytes) { // long b raw address
+ base_op = new_register(T_INT);
+ __ convert(Bytecodes::_l2i, buf.result(), base_op);
+ }
+#else
+ if (index->is_valid()) {
+ LIR_Opr tmp = new_register(T_LONG);
+ __ convert(Bytecodes::_i2l, index, tmp);
+ index = tmp;
+ }
+#endif
+
+ LIR_Address* a = new LIR_Address(base_op,
+ index,
+ LIR_Address::times_1,
+ offset,
+ T_BYTE);
+ BasicTypeList signature(3);
+ signature.append(T_INT);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+ const LIR_Opr result_reg = result_register_for(x->type());
+
+ LIR_Opr addr = new_pointer_register();
+ __ leal(LIR_OprFact::address(a), addr);
+
+ crc.load_item_force(cc->at(0));
+ __ move(addr, cc->at(1));
+ len.load_item_force(cc->at(2));
+
+ __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+ __ move(result_reg, result);
+
+ break;
+ }
+ default: {
+ ShouldNotReachHere();
+ }
+ }
+}
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -96,6 +96,9 @@
product(intx, UseAVX, 99, \
"Highest supported AVX instructions set on x86/x64") \
\
+ product(bool, UseCLMUL, false, \
+ "Control whether CLMUL instructions can be used on x86/x64") \
+ \
diagnostic(bool, UseIncDec, true, \
"Use INC, DEC instructions on x86") \
\
--- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,8 @@
address generate_empty_entry(void);
address generate_accessor_entry(void);
address generate_Reference_get_entry();
+ address generate_CRC32_update_entry();
+ address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
void lock_method(void);
void generate_stack_overflow_check(void);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2794,6 +2794,15 @@
}
}
+void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
+ if (reachable(src)) {
+ Assembler::movdqa(dst, as_Address(src));
+ } else {
+ lea(rscratch1, src);
+ Assembler::movdqa(dst, Address(rscratch1, 0));
+ }
+}
+
void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::movsd(dst, as_Address(src));
@@ -6388,6 +6397,193 @@
bind(L_done);
}
+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table
+ *
+ * @param [in,out]crc Register containing the crc.
+ * @param [in]val Register containing the byte to fold into the CRC.
+ * @param [in]table Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+ *
+ */
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+ xorl(val, crc);
+ andl(val, 0xFF);
+ shrl(crc, 8); // unsigned shift
+ xorl(crc, Address(table, val, Address::times_4, 0));
+}
+
+/**
+ * Fold 128-bit data chunk
+ */
+void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
+ vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+ vpclmulldq(xcrc, xK, xcrc); // [63:0]
+ vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+ pxor(xcrc, xtmp);
+}
+
+void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
+ vpclmulhdq(xtmp, xK, xcrc);
+ vpclmulldq(xcrc, xK, xcrc);
+ pxor(xcrc, xbuf);
+ pxor(xcrc, xtmp);
+}
+
+/**
+ * 8-bit folds to compute 32-bit CRC
+ *
+ * uint64_t xcrc;
+ * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
+ movdl(tmp, xcrc);
+ andl(tmp, 0xFF);
+ movdl(xtmp, Address(table, tmp, Address::times_4, 0));
+ psrldq(xcrc, 1); // unsigned shift one byte
+ pxor(xcrc, xtmp);
+}
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+ movl(tmp, crc);
+ andl(tmp, 0xFF);
+ shrl(crc, 8);
+ xorl(crc, Address(table, tmp, Address::times_4, 0));
+}
+
+/**
+ * @param crc register containing existing CRC (32-bit)
+ * @param buf register pointing to input byte buffer (byte*)
+ * @param len register containing number of bytes
+ * @param table register that will contain address of CRC table
+ * @param tmp scratch register
+ */
+void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) {
+ assert_different_registers(crc, buf, len, table, tmp, rax);
+
+ Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
+ Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
+
+ lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
+ notl(crc); // ~crc
+ cmpl(len, 16);
+ jcc(Assembler::less, L_tail);
+
+ // Align buffer to 16 bytes
+ movl(tmp, buf);
+ andl(tmp, 0xF);
+ jccb(Assembler::zero, L_aligned);
+ subl(tmp, 16);
+ addl(len, tmp);
+
+ align(4);
+ BIND(L_align_loop);
+ movsbl(rax, Address(buf, 0)); // load byte with sign extension
+ update_byte_crc32(crc, rax, table);
+ increment(buf);
+ incrementl(tmp);
+ jccb(Assembler::less, L_align_loop);
+
+ BIND(L_aligned);
+ movl(tmp, len); // save
+ shrl(len, 4);
+ jcc(Assembler::zero, L_tail_restore);
+
+ // Fold crc into first bytes of vector
+ movdqa(xmm1, Address(buf, 0));
+ movdl(rax, xmm1);
+ xorl(crc, rax);
+ pinsrd(xmm1, crc, 0);
+ addptr(buf, 16);
+ subl(len, 4); // len > 0
+ jcc(Assembler::less, L_fold_tail);
+
+ movdqa(xmm2, Address(buf, 0));
+ movdqa(xmm3, Address(buf, 16));
+ movdqa(xmm4, Address(buf, 32));
+ addptr(buf, 48);
+ subl(len, 3);
+ jcc(Assembler::lessEqual, L_fold_512b);
+
+ // Fold total 512 bits of polynomial on each iteration,
+ // 128 bits per each of 4 parallel streams.
+ movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
+
+ align(32);
+ BIND(L_fold_512b_loop);
+ fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0);
+ fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16);
+ fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32);
+ fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48);
+ addptr(buf, 64);
+ subl(len, 4);
+ jcc(Assembler::greater, L_fold_512b_loop);
+
+ // Fold 512 bits to 128 bits.
+ BIND(L_fold_512b);
+ movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
+ fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
+ fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
+ fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
+
+ // Fold the rest of 128 bits data chunks
+ BIND(L_fold_tail);
+ addl(len, 3);
+ jccb(Assembler::lessEqual, L_fold_128b);
+ movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
+
+ BIND(L_fold_tail_loop);
+ fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0);
+ addptr(buf, 16);
+ decrementl(len);
+ jccb(Assembler::greater, L_fold_tail_loop);
+
+ // Fold 128 bits in xmm1 down into 32 bits in crc register.
+ BIND(L_fold_128b);
+ movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
+ vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+ vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+ vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+ psrldq(xmm1, 8);
+ psrldq(xmm2, 4);
+ pxor(xmm0, xmm1);
+ pxor(xmm0, xmm2);
+
+ // 8 8-bit folds to compute 32-bit CRC.
+ for (int j = 0; j < 4; j++) {
+ fold_8bit_crc32(xmm0, table, xmm1, rax);
+ }
+ movdl(crc, xmm0); // mov 32 bits to general register
+ for (int j = 0; j < 4; j++) {
+ fold_8bit_crc32(crc, table, rax);
+ }
+
+ BIND(L_tail_restore);
+ movl(len, tmp); // restore
+ BIND(L_tail);
+ andl(len, 0xf);
+ jccb(Assembler::zero, L_exit);
+
+ // Fold the rest of bytes
+ align(4);
+ BIND(L_tail_loop);
+ movsbl(rax, Address(buf, 0)); // load byte with sign extension
+ update_byte_crc32(crc, rax, table);
+ increment(buf);
+ decrementl(len);
+ jccb(Assembler::greater, L_tail_loop);
+
+ BIND(L_exit);
+ notl(crc); // ~c
+}
+
#undef BIND
#undef BLOCK_COMMENT
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -899,6 +899,11 @@
void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); }
void movdqu(XMMRegister dst, AddressLiteral src);
+ // Move Aligned Double Quadword
+ void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
+ void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
+ void movdqa(XMMRegister dst, AddressLiteral src);
+
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
@@ -1027,6 +1032,16 @@
Assembler::vinsertf128h(dst, nds, src);
}
+ // Carry-Less Multiplication Quadword
+ void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ // 0x00 - multiply lower 64 bits [0:63]
+ Assembler::vpclmulqdq(dst, nds, src, 0x00);
+ }
+ void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ // 0x11 - multiply upper 64 bits [64:127]
+ Assembler::vpclmulqdq(dst, nds, src, 0x11);
+ }
+
// Data
void cmov32( Condition cc, Register dst, Address src);
@@ -1143,6 +1158,16 @@
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
XMMRegister tmp4, Register tmp5, Register result);
+ // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
+ void update_byte_crc32(Register crc, Register val, Register table);
+ void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
+ // Fold 128-bit data chunk
+ void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
+ void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
+ // Fold 8-bit data
+ void fold_8bit_crc32(Register crc, Register table, Register tmp);
+ void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
+
#undef VIRTUAL
};
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2713,6 +2713,59 @@
return start;
}
+ /**
+ * Arguments:
+ *
+ * Inputs:
+ * rsp(4) - int crc
+ * rsp(8) - byte* buf
+ * rsp(12) - int length
+ *
+ * Ouput:
+ * rax - int crc result
+ */
+ address generate_updateBytesCRC32() {
+ assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+
+ address start = __ pc();
+
+ const Register crc = rdx; // crc
+ const Register buf = rsi; // source java byte array address
+ const Register len = rcx; // length
+ const Register table = rdi; // crc_table address (reuse register)
+ const Register tmp = rbx;
+ assert_different_registers(crc, buf, len, table, tmp, rax);
+
+ BLOCK_COMMENT("Entry:");
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(rsi);
+ __ push(rdi);
+ __ push(rbx);
+
+ Address crc_arg(rbp, 8 + 0);
+ Address buf_arg(rbp, 8 + 4);
+ Address len_arg(rbp, 8 + 8);
+
+ // Load up:
+ __ movl(crc, crc_arg);
+ __ movptr(buf, buf_arg);
+ __ movl(len, len_arg);
+
+ __ kernel_crc32(crc, buf, len, table, tmp);
+
+ __ movl(rax, crc);
+ __ pop(rbx);
+ __ pop(rdi);
+ __ pop(rsi);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
+
public:
// Information about frame layout at time of blocking runtime call.
@@ -2887,6 +2940,12 @@
// Build this early so it's available for the interpreter
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+
+ if (UseCRC32Intrinsics) {
+ // set table address before stub generation which use it
+ StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
+ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+ }
}
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3584,7 +3584,45 @@
return start;
}
-
+ /**
+ * Arguments:
+ *
+ * Inputs:
+ * c_rarg0 - int crc
+ * c_rarg1 - byte* buf
+ * c_rarg2 - int length
+ *
+ * Ouput:
+ * rax - int crc result
+ */
+ address generate_updateBytesCRC32() {
+ assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+
+ address start = __ pc();
+ // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
+ // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
+ // rscratch1: r10
+ const Register crc = c_rarg0; // crc
+ const Register buf = c_rarg1; // source java byte array address
+ const Register len = c_rarg2; // length
+ const Register table = c_rarg3; // crc_table address (reuse register)
+ const Register tmp = r11;
+ assert_different_registers(crc, buf, len, table, tmp, rax);
+
+ BLOCK_COMMENT("Entry:");
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ kernel_crc32(crc, buf, len, table, tmp);
+
+ __ movl(rax, crc);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+ }
#undef __
#define __ masm->
@@ -3736,6 +3774,11 @@
CAST_FROM_FN_PTR(address,
SharedRuntime::
throw_StackOverflowError));
+ if (UseCRC32Intrinsics) {
+ // set table address before stub generation which use it
+ StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
+ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+ }
}
void generate_all() {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::x86::_verify_mxcsr_entry = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+
+uint64_t StubRoutines::x86::_crc_by128_masks[] =
+{
+ /* The fields in this structure are arranged so that they can be
+ * picked up two at a time with 128-bit loads.
+ *
+ * Because of flipped bit order for this CRC polynomials
+ * the constant for X**N is left-shifted by 1. This is because
+ * a 64 x 64 polynomial multiply produces a 127-bit result
+ * but the highest term is always aligned to bit 0 in the container.
+ * Pre-shifting by one fixes this, at the cost of potentially making
+ * the 32-bit constant no longer fit in a 32-bit container (thus the
+ * use of uint64_t, though this is also the size used by the carry-
+ * less multiply instruction.
+ *
+ * In addition, the flipped bit order and highest-term-at-least-bit
+ * multiply changes the constants used. The 96-bit result will be
+ * aligned to the high-term end of the target 128-bit container,
+ * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
+ * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
+ *
+ * This cause additional problems in the 128-to-64-bit reduction; see the
+ * code for details. By storing a mask in the otherwise unused half of
+ * a 128-bit constant, bits can be cleared before multiplication without
+ * storing and reloading. Note that staying on a 128-bit datapath means
+ * that some data is uselessly stored and some unused data is intersected
+ * with an irrelevant constant.
+ */
+
+ ((uint64_t) 0xffffffffUL), /* low of K_M_64 */
+ ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64 */
+ ((uint64_t) 0xba8ccbe8U << 1), /* low of K_160_96 */
+ ((uint64_t) 0x6655004fU << 1), /* high of K_160_96 */
+ ((uint64_t) 0xaa2215eaU << 1), /* low of K_544_480 */
+ ((uint64_t) 0xe3720acbU << 1) /* high of K_544_480 */
+};
+
+/**
+ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
+ */
+juint StubRoutines::x86::_crc_table[] =
+{
+ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+ 0x2d02ef8dUL
+};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_STUBROUTINES_X86_HPP
+#define CPU_X86_VM_STUBROUTINES_X86_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+ private:
+ static address _verify_mxcsr_entry;
+ // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+ static address _key_shuffle_mask_addr;
+ // masks and table for CRC32
+ static uint64_t _crc_by128_masks[];
+ static juint _crc_table[];
+
+ public:
+ static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
+ static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+ static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
+
+#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,4 @@
// Implementation of the platform-specific part of StubRoutines - for
// a description of how to extend it, see the stubRoutines.hpp file.
-address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
-address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,15 +39,12 @@
friend class VMStructs;
private:
- static address _verify_mxcsr_entry;
static address _verify_fpu_cntrl_wrd_entry;
- // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
- static address _key_shuffle_mask_addr;
public:
- static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; }
- static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+
+# include "stubRoutines_x86.hpp"
};
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,8 +34,6 @@
address StubRoutines::x86::_get_previous_fp_entry = NULL;
address StubRoutines::x86::_get_previous_sp_entry = NULL;
-address StubRoutines::x86::_verify_mxcsr_entry = NULL;
-
address StubRoutines::x86::_f2i_fixup = NULL;
address StubRoutines::x86::_f2l_fixup = NULL;
address StubRoutines::x86::_d2i_fixup = NULL;
@@ -45,4 +43,3 @@
address StubRoutines::x86::_double_sign_mask = NULL;
address StubRoutines::x86::_double_sign_flip = NULL;
address StubRoutines::x86::_mxcsr_std = NULL;
-address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -42,7 +42,6 @@
private:
static address _get_previous_fp_entry;
static address _get_previous_sp_entry;
- static address _verify_mxcsr_entry;
static address _f2i_fixup;
static address _f2l_fixup;
@@ -54,8 +53,6 @@
static address _double_sign_mask;
static address _double_sign_flip;
static address _mxcsr_std;
- // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
- static address _key_shuffle_mask_addr;
public:
@@ -69,11 +66,6 @@
return _get_previous_sp_entry;
}
- static address verify_mxcsr_entry()
- {
- return _verify_mxcsr_entry;
- }
-
static address f2i_fixup()
{
return _f2i_fixup;
@@ -119,7 +111,7 @@
return _mxcsr_std;
}
- static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+# include "stubRoutines_x86.hpp"
};
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -868,6 +868,120 @@
return generate_accessor_entry();
}
+/**
+ * Method entry for static native methods:
+ * int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+ if (UseCRC32Intrinsics) {
+ address entry = __ pc();
+
+ // rbx,: Method*
+ // rsi: senderSP must preserved for slow path, set SP to it on fast path
+ // rdx: scratch
+ // rdi: scratch
+
+ Label slow_path;
+ // If we need a safepoint check, generate full interpreter entry.
+ ExternalAddress state(SafepointSynchronize::address_of_state());
+ __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, slow_path);
+
+ // We don't generate local frame and don't align stack because
+ // we call stub code and there is no safepoint on this path.
+
+ // Load parameters
+ const Register crc = rax; // crc
+ const Register val = rdx; // source java byte value
+ const Register tbl = rdi; // scratch
+
+ // Arguments are reversed on java expression stack
+ __ movl(val, Address(rsp, wordSize)); // byte value
+ __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
+
+ __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
+ __ notl(crc); // ~crc
+ __ update_byte_crc32(crc, val, tbl);
+ __ notl(crc); // ~crc
+ // result in rax
+
+ // _areturn
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set sp to sender sp
+ __ jmp(rdi);
+
+ // generate a vanilla native entry as the slow path
+ __ bind(slow_path);
+
+ (void) generate_native_entry(false);
+
+ return entry;
+ }
+ return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+ if (UseCRC32Intrinsics) {
+ address entry = __ pc();
+
+ // rbx,: Method*
+ // rsi: senderSP must preserved for slow path, set SP to it on fast path
+ // rdx: scratch
+ // rdi: scratch
+
+ Label slow_path;
+ // If we need a safepoint check, generate full interpreter entry.
+ ExternalAddress state(SafepointSynchronize::address_of_state());
+ __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, slow_path);
+
+ // We don't generate local frame and don't align stack because
+ // we call stub code and there is no safepoint on this path.
+
+ // Load parameters
+ const Register crc = rax; // crc
+ const Register buf = rdx; // source java byte array address
+ const Register len = rdi; // length
+
+ // Arguments are reversed on java expression stack
+ __ movl(len, Address(rsp, wordSize)); // Length
+ // Calculate address of start element
+ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+ __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
+ __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+ __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC
+ } else {
+ __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
+ __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+ __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+ __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC
+ }
+
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
+ // result in rax
+
+ // _areturn
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set sp to sender sp
+ __ jmp(rdi);
+
+ // generate a vanilla native entry as the slow path
+ __ bind(slow_path);
+
+ (void) generate_native_entry(false);
+
+ return entry;
+ }
+ return generate_native_entry(false);
+}
+
//
// Interpreter stub for calling a native method. (asm interpreter)
// This sets up a somewhat different looking stack for calling the native method
@@ -1501,15 +1615,16 @@
// determine code generation flags
bool synchronized = false;
address entry_point = NULL;
+ InterpreterGenerator* ig_this = (InterpreterGenerator*)this;
switch (kind) {
- case Interpreter::zerolocals : break;
- case Interpreter::zerolocals_synchronized: synchronized = true; break;
- case Interpreter::native : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break;
- case Interpreter::native_synchronized : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); break;
- case Interpreter::empty : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); break;
- case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break;
- case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break;
+ case Interpreter::zerolocals : break;
+ case Interpreter::zerolocals_synchronized: synchronized = true; break;
+ case Interpreter::native : entry_point = ig_this->generate_native_entry(false); break;
+ case Interpreter::native_synchronized : entry_point = ig_this->generate_native_entry(true); break;
+ case Interpreter::empty : entry_point = ig_this->generate_empty_entry(); break;
+ case Interpreter::accessor : entry_point = ig_this->generate_accessor_entry(); break;
+ case Interpreter::abstract : entry_point = ig_this->generate_abstract_entry(); break;
case Interpreter::java_lang_math_sin : // fall thru
case Interpreter::java_lang_math_cos : // fall thru
@@ -1519,9 +1634,15 @@
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
- case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break;
+ case Interpreter::java_lang_math_exp : entry_point = ig_this->generate_math_entry(kind); break;
case Interpreter::java_lang_ref_reference_get
- : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+ : entry_point = ig_this->generate_Reference_get_entry(); break;
+ case Interpreter::java_util_zip_CRC32_update
+ : entry_point = ig_this->generate_CRC32_update_entry(); break;
+ case Interpreter::java_util_zip_CRC32_updateBytes
+ : // fall thru
+ case Interpreter::java_util_zip_CRC32_updateByteBuffer
+ : entry_point = ig_this->generate_CRC32_updateBytes_entry(kind); break;
default:
fatal(err_msg("unexpected method kind: %d", kind));
break;
@@ -1529,7 +1650,7 @@
if (entry_point) return entry_point;
- return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
+ return ig_this->generate_normal_entry(synchronized);
}
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -840,6 +840,117 @@
return generate_accessor_entry();
}
+/**
+ * Method entry for static native methods:
+ * int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+ if (UseCRC32Intrinsics) {
+ address entry = __ pc();
+
+ // rbx,: Method*
+ // rsi: senderSP must preserved for slow path, set SP to it on fast path
+ // rdx: scratch
+ // rdi: scratch
+
+ Label slow_path;
+ // If we need a safepoint check, generate full interpreter entry.
+ ExternalAddress state(SafepointSynchronize::address_of_state());
+ __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, slow_path);
+
+ // We don't generate local frame and don't align stack because
+ // we call stub code and there is no safepoint on this path.
+
+ // Load parameters
+ const Register crc = rax; // crc
+ const Register val = rdx; // source java byte value
+ const Register tbl = rdi; // scratch
+
+ // Arguments are reversed on java expression stack
+ __ movl(val, Address(rsp, wordSize)); // byte value
+ __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
+
+ __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
+ __ notl(crc); // ~crc
+ __ update_byte_crc32(crc, val, tbl);
+ __ notl(crc); // ~crc
+ // result in rax
+
+ // _areturn
+ __ pop(rdi); // get return address
+ __ mov(rsp, rsi); // set sp to sender sp
+ __ jmp(rdi);
+
+ // generate a vanilla native entry as the slow path
+ __ bind(slow_path);
+
+ (void) generate_native_entry(false);
+
+ return entry;
+ }
+ return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+ if (UseCRC32Intrinsics) {
+ address entry = __ pc();
+
+ // rbx,: Method*
+ // r13: senderSP must preserved for slow path, set SP to it on fast path
+
+ Label slow_path;
+ // If we need a safepoint check, generate full interpreter entry.
+ ExternalAddress state(SafepointSynchronize::address_of_state());
+ __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, slow_path);
+
+ // We don't generate local frame and don't align stack because
+ // we call stub code and there is no safepoint on this path.
+
+ // Load parameters
+ const Register crc = c_rarg0; // crc
+ const Register buf = c_rarg1; // source java byte array address
+ const Register len = c_rarg2; // length
+
+ // Arguments are reversed on java expression stack
+ __ movl(len, Address(rsp, wordSize)); // Length
+ // Calculate address of start element
+ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+ __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
+ __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+ __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC
+ } else {
+ __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
+ __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+ __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+ __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC
+ }
+
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
+ // result in rax
+
+ // _areturn
+ __ pop(rdi); // get return address
+ __ mov(rsp, r13); // set sp to sender sp
+ __ jmp(rdi);
+
+ // generate a vanilla native entry as the slow path
+ __ bind(slow_path);
+
+ (void) generate_native_entry(false);
+
+ return entry;
+ }
+ return generate_native_entry(false);
+}
// Interpreter stub for calling a native method. (asm interpreter)
// This sets up a somewhat different looking stack for calling the
@@ -1510,15 +1621,16 @@
// determine code generation flags
bool synchronized = false;
address entry_point = NULL;
+ InterpreterGenerator* ig_this = (InterpreterGenerator*)this;
switch (kind) {
- case Interpreter::zerolocals : break;
- case Interpreter::zerolocals_synchronized: synchronized = true; break;
- case Interpreter::native : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break;
- case Interpreter::native_synchronized : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); break;
- case Interpreter::empty : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); break;
- case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break;
- case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break;
+ case Interpreter::zerolocals : break;
+ case Interpreter::zerolocals_synchronized: synchronized = true; break;
+ case Interpreter::native : entry_point = ig_this->generate_native_entry(false); break;
+ case Interpreter::native_synchronized : entry_point = ig_this->generate_native_entry(true); break;
+ case Interpreter::empty : entry_point = ig_this->generate_empty_entry(); break;
+ case Interpreter::accessor : entry_point = ig_this->generate_accessor_entry(); break;
+ case Interpreter::abstract : entry_point = ig_this->generate_abstract_entry(); break;
case Interpreter::java_lang_math_sin : // fall thru
case Interpreter::java_lang_math_cos : // fall thru
@@ -1528,9 +1640,15 @@
case Interpreter::java_lang_math_log10 : // fall thru
case Interpreter::java_lang_math_sqrt : // fall thru
case Interpreter::java_lang_math_pow : // fall thru
- case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break;
+ case Interpreter::java_lang_math_exp : entry_point = ig_this->generate_math_entry(kind); break;
case Interpreter::java_lang_ref_reference_get
- : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+ : entry_point = ig_this->generate_Reference_get_entry(); break;
+ case Interpreter::java_util_zip_CRC32_update
+ : entry_point = ig_this->generate_CRC32_update_entry(); break;
+ case Interpreter::java_util_zip_CRC32_updateBytes
+ : // fall thru
+ case Interpreter::java_util_zip_CRC32_updateByteBuffer
+ : entry_point = ig_this->generate_CRC32_updateBytes_entry(kind); break;
default:
fatal(err_msg("unexpected method kind: %d", kind));
break;
@@ -1540,8 +1658,7 @@
return entry_point;
}
- return ((InterpreterGenerator*) this)->
- generate_normal_entry(synchronized);
+ return ig_this->generate_normal_entry(synchronized);
}
// These should never be compiled since the interpreter will prefer
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -446,6 +446,7 @@
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
(supports_aes() ? ", aes" : ""),
+ (supports_clmul() ? ", clmul" : ""),
(supports_erms() ? ", erms" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
@@ -489,6 +490,27 @@
FLAG_SET_DEFAULT(UseAES, false);
}
+ // Use CLMUL instructions if available.
+ if (supports_clmul()) {
+ if (FLAG_IS_DEFAULT(UseCLMUL)) {
+ UseCLMUL = true;
+ }
+ } else if (UseCLMUL) {
+ if (!FLAG_IS_DEFAULT(UseCLMUL))
+ warning("CLMUL instructions not available on this CPU (AVX may also be required)");
+ FLAG_SET_DEFAULT(UseCLMUL, false);
+ }
+
+ if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+ if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+ UseCRC32Intrinsics = true;
+ }
+ } else if (UseCRC32Intrinsics) {
+ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
+ warning("CRC32 Intrinsics requires AVX and CLMUL instructions (not available on this CPU)");
+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+ }
+
// The AES intrinsic stubs require AES instruction support (of course)
// but also require sse3 mode for instructions it use.
if (UseAES && (UseSSE > 2)) {
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -61,7 +61,8 @@
uint32_t value;
struct {
uint32_t sse3 : 1,
- : 2,
+ clmul : 1,
+ : 1,
monitor : 1,
: 1,
vmx : 1,
@@ -249,7 +250,8 @@
CPU_AVX = (1 << 17),
CPU_AVX2 = (1 << 18),
CPU_AES = (1 << 19),
- CPU_ERMS = (1 << 20) // enhanced 'rep movsb/stosb' instructions
+ CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
+ CPU_CLMUL = (1 << 21) // carryless multiply for CRC
} cpuFeatureFlags;
enum {
@@ -429,6 +431,8 @@
result |= CPU_AES;
if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
result |= CPU_ERMS;
+ if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
+ result |= CPU_CLMUL;
// AMD features.
if (is_amd()) {
@@ -555,6 +559,7 @@
static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
+ static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3461,6 +3461,14 @@
preserves_state = true;
break;
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ if (!UseCRC32Intrinsics) return false;
+ cantrap = false;
+ preserves_state = true;
+ break;
+
case vmIntrinsics::_loadFence :
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence :
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -430,6 +430,11 @@
_stub = new ArrayCopyStub(this);
}
+LIR_OpUpdateCRC32::LIR_OpUpdateCRC32(LIR_Opr crc, LIR_Opr val, LIR_Opr res)
+ : LIR_Op(lir_updatecrc32, res, NULL)
+ , _crc(crc)
+ , _val(val) {
+}
//-------------------verify--------------------------
@@ -876,6 +881,20 @@
}
+// LIR_OpUpdateCRC32
+ case lir_updatecrc32: {
+ assert(op->as_OpUpdateCRC32() != NULL, "must be");
+ LIR_OpUpdateCRC32* opUp = (LIR_OpUpdateCRC32*)op;
+
+ assert(opUp->_crc->is_valid(), "used"); do_input(opUp->_crc); do_temp(opUp->_crc);
+ assert(opUp->_val->is_valid(), "used"); do_input(opUp->_val); do_temp(opUp->_val);
+ assert(opUp->_result->is_valid(), "used"); do_output(opUp->_result);
+ assert(opUp->_info == NULL, "no info for LIR_OpUpdateCRC32");
+
+ break;
+ }
+
+
// LIR_OpLock
case lir_lock:
case lir_unlock: {
@@ -1056,6 +1075,10 @@
masm->emit_code_stub(stub());
}
+void LIR_OpUpdateCRC32::emit_code(LIR_Assembler* masm) {
+ masm->emit_updatecrc32(this);
+}
+
void LIR_Op0::emit_code(LIR_Assembler* masm) {
masm->emit_op0(this);
}
@@ -1763,6 +1786,8 @@
case lir_dynamic_call: s = "dynamic"; break;
// LIR_OpArrayCopy
case lir_arraycopy: s = "arraycopy"; break;
+ // LIR_OpUpdateCRC32
+ case lir_updatecrc32: s = "updatecrc32"; break;
// LIR_OpLock
case lir_lock: s = "lock"; break;
case lir_unlock: s = "unlock"; break;
@@ -1815,6 +1840,13 @@
tmp()->print(out); out->print(" ");
}
+// LIR_OpUpdateCRC32
+void LIR_OpUpdateCRC32::print_instr(outputStream* out) const {
+ crc()->print(out); out->print(" ");
+ val()->print(out); out->print(" ");
+ result_opr()->print(out); out->print(" ");
+}
+
// LIR_OpCompareAndSwap
void LIR_OpCompareAndSwap::print_instr(outputStream* out) const {
addr()->print(out); out->print(" ");
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -877,6 +877,7 @@
class LIR_OpJavaCall;
class LIR_OpRTCall;
class LIR_OpArrayCopy;
+class LIR_OpUpdateCRC32;
class LIR_OpLock;
class LIR_OpTypeCheck;
class LIR_OpCompareAndSwap;
@@ -982,6 +983,9 @@
, begin_opArrayCopy
, lir_arraycopy
, end_opArrayCopy
+ , begin_opUpdateCRC32
+ , lir_updatecrc32
+ , end_opUpdateCRC32
, begin_opLock
, lir_lock
, lir_unlock
@@ -1137,6 +1141,7 @@
virtual LIR_Op2* as_Op2() { return NULL; }
virtual LIR_Op3* as_Op3() { return NULL; }
virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+ virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
virtual LIR_OpCompareAndSwap* as_OpCompareAndSwap() { return NULL; }
virtual LIR_OpProfileCall* as_OpProfileCall() { return NULL; }
@@ -1293,6 +1298,25 @@
void print_instr(outputStream* out) const PRODUCT_RETURN;
};
+// LIR_OpUpdateCRC32
+class LIR_OpUpdateCRC32: public LIR_Op {
+ friend class LIR_OpVisitState;
+
+private:
+ LIR_Opr _crc;
+ LIR_Opr _val;
+
+public:
+
+ LIR_OpUpdateCRC32(LIR_Opr crc, LIR_Opr val, LIR_Opr res);
+
+ LIR_Opr crc() const { return _crc; }
+ LIR_Opr val() const { return _val; }
+
+ virtual void emit_code(LIR_Assembler* masm);
+ virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return this; }
+ void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
// --------------------------------------------------
// LIR_Op0
@@ -2212,6 +2236,8 @@
void arraycopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_Opr dst_pos, LIR_Opr length, LIR_Opr tmp, ciArrayKlass* expected_type, int flags, CodeEmitInfo* info) { append(new LIR_OpArrayCopy(src, src_pos, dst, dst_pos, length, tmp, expected_type, flags, info)); }
+ void update_crc32(LIR_Opr crc, LIR_Opr val, LIR_Opr res) { append(new LIR_OpUpdateCRC32(crc, val, res)); }
+
void fpop_raw() { append(new LIR_Op0(lir_fpop_raw)); }
void instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci);
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -195,6 +195,7 @@
void emit_opBranch(LIR_OpBranch* op);
void emit_opLabel(LIR_OpLabel* op);
void emit_arraycopy(LIR_OpArrayCopy* op);
+ void emit_updatecrc32(LIR_OpUpdateCRC32* op);
void emit_opConvert(LIR_OpConvert* op);
void emit_alloc_obj(LIR_OpAllocObj* op);
void emit_alloc_array(LIR_OpAllocArray* op);
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -2994,6 +2994,12 @@
do_Reference_get(x);
break;
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ do_update_CRC32(x);
+ break;
+
default: ShouldNotReachHere(); break;
}
}
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -247,6 +247,7 @@
void do_NIOCheckIndex(Intrinsic* x);
void do_FPIntrinsics(Intrinsic* x);
void do_Reference_get(Intrinsic* x);
+ void do_update_CRC32(Intrinsic* x);
void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store);
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -299,6 +299,7 @@
#ifdef TRACE_HAVE_INTRINSICS
FUNCTION_CASE(entry, TRACE_TIME_METHOD);
#endif
+ FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
#undef FUNCTION_CASE
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -771,6 +771,17 @@
do_name( decrypt_name, "decrypt") \
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \
\
+ /* support for java.util.zip */ \
+ do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
+ do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \
+ do_name( update_name, "update") \
+ do_intrinsic(_updateBytesCRC32, java_util_zip_CRC32, updateBytes_name, updateBytes_signature, F_SN) \
+ do_name( updateBytes_name, "updateBytes") \
+ do_signature(updateBytes_signature, "(I[BII)I") \
+ do_intrinsic(_updateByteBufferCRC32, java_util_zip_CRC32, updateByteBuffer_name, updateByteBuffer_signature, F_SN) \
+ do_name( updateByteBuffer_name, "updateByteBuffer") \
+ do_signature(updateByteBuffer_signature, "(IJII)I") \
+ \
/* support for sun.misc.Unsafe */ \
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
\
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -102,6 +102,9 @@
java_lang_math_pow, // implementation of java.lang.Math.pow (x,y)
java_lang_math_exp, // implementation of java.lang.Math.exp (x)
java_lang_ref_reference_get, // implementation of java.lang.ref.Reference.get()
+ java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update()
+ java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes()
+ java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer()
number_of_method_entries,
invalid = -1
};
--- a/hotspot/src/share/vm/interpreter/interpreter.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/interpreter/interpreter.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -195,6 +195,17 @@
return kind;
}
+#ifndef CC_INTERP
+ if (UseCRC32Intrinsics && m->is_native()) {
+ // Use optimized stub code for CRC32 native methods.
+ switch (m->intrinsic_id()) {
+ case vmIntrinsics::_updateCRC32 : return java_util_zip_CRC32_update;
+ case vmIntrinsics::_updateBytesCRC32 : return java_util_zip_CRC32_updateBytes;
+ case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer;
+ }
+ }
+#endif
+
// Native method?
// Note: This test must come _before_ the test for intrinsic
// methods. See also comments below.
@@ -297,6 +308,9 @@
case java_lang_math_sqrt : tty->print("java_lang_math_sqrt" ); break;
case java_lang_math_log : tty->print("java_lang_math_log" ); break;
case java_lang_math_log10 : tty->print("java_lang_math_log10" ); break;
+ case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break;
+ case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break;
+ case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
default:
if (kind >= method_handle_invoke_FIRST &&
kind <= method_handle_invoke_LAST) {
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -373,6 +373,12 @@
method_entry(java_lang_math_pow )
method_entry(java_lang_ref_reference_get)
+ if (UseCRC32Intrinsics) {
+ method_entry(java_util_zip_CRC32_update)
+ method_entry(java_util_zip_CRC32_updateBytes)
+ method_entry(java_util_zip_CRC32_updateByteBuffer)
+ }
+
initialize_method_handle_entries();
// all native method kinds (must be one contiguous block)
--- a/hotspot/src/share/vm/opto/escape.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/opto/escape.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -933,6 +933,7 @@
(call->as_CallLeaf()->_name != NULL &&
(strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
--- a/hotspot/src/share/vm/opto/library_call.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/opto/library_call.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -291,6 +291,9 @@
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_encodeISOArray();
+ bool inline_updateCRC32();
+ bool inline_updateBytesCRC32();
+ bool inline_updateByteBufferCRC32();
};
@@ -488,6 +491,12 @@
is_predicted = true;
break;
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ if (!UseCRC32Intrinsics) return NULL;
+ break;
+
default:
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -807,6 +816,13 @@
case vmIntrinsics::_encodeISOArray:
return inline_encodeISOArray();
+ case vmIntrinsics::_updateCRC32:
+ return inline_updateCRC32();
+ case vmIntrinsics::_updateBytesCRC32:
+ return inline_updateBytesCRC32();
+ case vmIntrinsics::_updateByteBufferCRC32:
+ return inline_updateByteBufferCRC32();
+
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@@ -884,7 +900,7 @@
IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);
- Node* if_slow = _gvn.transform( new (C) IfTrueNode(iff) );
+ Node* if_slow = _gvn.transform(new (C) IfTrueNode(iff));
if (if_slow == top()) {
// The slow branch is never taken. No need to build this guard.
return NULL;
@@ -893,7 +909,7 @@
if (region != NULL)
region->add_req(if_slow);
- Node* if_fast = _gvn.transform( new (C) IfFalseNode(iff) );
+ Node* if_fast = _gvn.transform(new (C) IfFalseNode(iff));
set_control(if_fast);
return if_slow;
@@ -912,8 +928,8 @@
return NULL; // already stopped
if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
return NULL; // index is already adequately typed
- Node* cmp_lt = _gvn.transform( new (C) CmpINode(index, intcon(0)) );
- Node* bol_lt = _gvn.transform( new (C) BoolNode(cmp_lt, BoolTest::lt) );
+ Node* cmp_lt = _gvn.transform(new (C) CmpINode(index, intcon(0)));
+ Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
if (is_neg != NULL && pos_index != NULL) {
// Emulate effect of Parse::adjust_map_after_if.
@@ -930,9 +946,9 @@
return NULL; // already stopped
if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
return NULL; // index is already adequately typed
- Node* cmp_le = _gvn.transform( new (C) CmpINode(index, intcon(0)) );
+ Node* cmp_le = _gvn.transform(new (C) CmpINode(index, intcon(0)));
BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
- Node* bol_le = _gvn.transform( new (C) BoolNode(cmp_le, le_or_eq) );
+ Node* bol_le = _gvn.transform(new (C) BoolNode(cmp_le, le_or_eq));
Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
if (is_notp != NULL && pos_index != NULL) {
// Emulate effect of Parse::adjust_map_after_if.
@@ -968,9 +984,9 @@
return NULL; // common case of whole-array copy
Node* last = subseq_length;
if (!zero_offset) // last += offset
- last = _gvn.transform( new (C) AddINode(last, offset));
- Node* cmp_lt = _gvn.transform( new (C) CmpUNode(array_length, last) );
- Node* bol_lt = _gvn.transform( new (C) BoolNode(cmp_lt, BoolTest::lt) );
+ last = _gvn.transform(new (C) AddINode(last, offset));
+ Node* cmp_lt = _gvn.transform(new (C) CmpUNode(array_length, last));
+ Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
return is_over;
}
@@ -1151,8 +1167,8 @@
Node* argument_cnt = load_String_length(no_ctrl, argument);
// Check for receiver count != argument count
- Node* cmp = _gvn.transform( new(C) CmpINode(receiver_cnt, argument_cnt) );
- Node* bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::ne) );
+ Node* cmp = _gvn.transform(new(C) CmpINode(receiver_cnt, argument_cnt));
+ Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::ne));
Node* if_ne = generate_slow_guard(bol, NULL);
if (if_ne != NULL) {
phi->init_req(4, intcon(0));
@@ -1258,7 +1274,7 @@
Node* sourceOffset = load_String_offset(no_ctrl, string_object);
Node* sourceCount = load_String_length(no_ctrl, string_object);
- Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array, true)) );
+ Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array, true)));
jint target_length = target_array->length();
const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
@@ -1365,8 +1381,8 @@
Node* substr_cnt = load_String_length(no_ctrl, arg);
// Check for substr count > string count
- Node* cmp = _gvn.transform( new(C) CmpINode(substr_cnt, source_cnt) );
- Node* bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::gt) );
+ Node* cmp = _gvn.transform(new(C) CmpINode(substr_cnt, source_cnt));
+ Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::gt));
Node* if_gt = generate_slow_guard(bol, NULL);
if (if_gt != NULL) {
result_phi->init_req(2, intcon(-1));
@@ -1375,8 +1391,8 @@
if (!stopped()) {
// Check for substr count == 0
- cmp = _gvn.transform( new(C) CmpINode(substr_cnt, intcon(0)) );
- bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::eq) );
+ cmp = _gvn.transform(new(C) CmpINode(substr_cnt, intcon(0)));
+ bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::eq));
Node* if_zero = generate_slow_guard(bol, NULL);
if (if_zero != NULL) {
result_phi->init_req(3, intcon(0));
@@ -1552,7 +1568,7 @@
// Check PI/4 : abs(arg)
Node *cmp = _gvn.transform(new (C) CmpDNode(pi4,abs));
// Check: If PI/4 < abs(arg) then go slow
- Node *bol = _gvn.transform( new (C) BoolNode( cmp, BoolTest::lt ) );
+ Node *bol = _gvn.transform(new (C) BoolNode( cmp, BoolTest::lt ));
// Branch either way
IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
set_control(opt_iff(r,iff));
@@ -1617,8 +1633,8 @@
// to the runtime to properly handle corner cases
IfNode* iff = create_and_xform_if(control(), bolisnum, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
- Node* if_slow = _gvn.transform( new (C) IfFalseNode(iff) );
- Node* if_fast = _gvn.transform( new (C) IfTrueNode(iff) );
+ Node* if_slow = _gvn.transform(new (C) IfFalseNode(iff));
+ Node* if_fast = _gvn.transform(new (C) IfTrueNode(iff));
if (!if_slow->is_top()) {
RegionNode* result_region = new (C) RegionNode(3);
@@ -1704,42 +1720,42 @@
// Check x:0
Node *cmp = _gvn.transform(new (C) CmpDNode(x, zeronode));
// Check: If (x<=0) then go complex path
- Node *bol1 = _gvn.transform( new (C) BoolNode( cmp, BoolTest::le ) );
+ Node *bol1 = _gvn.transform(new (C) BoolNode( cmp, BoolTest::le ));
// Branch either way
IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
// Fast path taken; set region slot 3
- Node *fast_taken = _gvn.transform( new (C) IfFalseNode(if1) );
+ Node *fast_taken = _gvn.transform(new (C) IfFalseNode(if1));
r->init_req(3,fast_taken); // Capture fast-control
// Fast path not-taken, i.e. slow path
- Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) );
+ Node *complex_path = _gvn.transform(new (C) IfTrueNode(if1));
// Set fast path result
- Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) );
+ Node *fast_result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
phi->init_req(3, fast_result);
// Complex path
// Build the second if node (if y is long)
// Node for (long)y
- Node *longy = _gvn.transform( new (C) ConvD2LNode(y));
+ Node *longy = _gvn.transform(new (C) ConvD2LNode(y));
// Node for (double)((long) y)
- Node *doublelongy= _gvn.transform( new (C) ConvL2DNode(longy));
+ Node *doublelongy= _gvn.transform(new (C) ConvL2DNode(longy));
// Check (double)((long) y) : y
Node *cmplongy= _gvn.transform(new (C) CmpDNode(doublelongy, y));
// Check if (y isn't long) then go to slow path
- Node *bol2 = _gvn.transform( new (C) BoolNode( cmplongy, BoolTest::ne ) );
+ Node *bol2 = _gvn.transform(new (C) BoolNode( cmplongy, BoolTest::ne ));
// Branch either way
IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
- Node* ylong_path = _gvn.transform( new (C) IfFalseNode(if2));
-
- Node *slow_path = _gvn.transform( new (C) IfTrueNode(if2) );
+ Node* ylong_path = _gvn.transform(new (C) IfFalseNode(if2));
+
+ Node *slow_path = _gvn.transform(new (C) IfTrueNode(if2));
// Calculate DPow(abs(x), y)*(1 & (long)y)
// Node for constant 1
Node *conone = longcon(1);
// 1& (long)y
- Node *signnode= _gvn.transform( new (C) AndLNode(conone, longy) );
+ Node *signnode= _gvn.transform(new (C) AndLNode(conone, longy));
// A huge number is always even. Detect a huge number by checking
// if y + 1 == y and set integer to be tested for parity to 0.
@@ -1747,9 +1763,9 @@
// (long)9.223372036854776E18 = max_jlong
// (double)(long)9.223372036854776E18 = 9.223372036854776E18
// max_jlong is odd but 9.223372036854776E18 is even
- Node* yplus1 = _gvn.transform( new (C) AddDNode(y, makecon(TypeD::make(1))));
+ Node* yplus1 = _gvn.transform(new (C) AddDNode(y, makecon(TypeD::make(1))));
Node *cmpyplus1= _gvn.transform(new (C) CmpDNode(yplus1, y));
- Node *bolyplus1 = _gvn.transform( new (C) BoolNode( cmpyplus1, BoolTest::eq ) );
+ Node *bolyplus1 = _gvn.transform(new (C) BoolNode( cmpyplus1, BoolTest::eq ));
Node* correctedsign = NULL;
if (ConditionalMoveLimit != 0) {
correctedsign = _gvn.transform( CMoveNode::make(C, NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
@@ -1757,8 +1773,8 @@
IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
RegionNode *r = new (C) RegionNode(3);
Node *phi = new (C) PhiNode(r, TypeLong::LONG);
- r->init_req(1, _gvn.transform( new (C) IfFalseNode(ifyplus1)));
- r->init_req(2, _gvn.transform( new (C) IfTrueNode(ifyplus1)));
+ r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyplus1)));
+ r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyplus1)));
phi->init_req(1, signnode);
phi->init_req(2, longcon(0));
correctedsign = _gvn.transform(phi);
@@ -1771,11 +1787,11 @@
// Check (1&(long)y)==0?
Node *cmpeq1 = _gvn.transform(new (C) CmpLNode(correctedsign, conzero));
// Check if (1&(long)y)!=0?, if so the result is negative
- Node *bol3 = _gvn.transform( new (C) BoolNode( cmpeq1, BoolTest::ne ) );
+ Node *bol3 = _gvn.transform(new (C) BoolNode( cmpeq1, BoolTest::ne ));
// abs(x)
- Node *absx=_gvn.transform( new (C) AbsDNode(x));
+ Node *absx=_gvn.transform(new (C) AbsDNode(x));
// abs(x)^y
- Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) );
+ Node *absxpowy = _gvn.transform(new (C) PowDNode(C, control(), absx, y));
// -abs(x)^y
Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
// (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
@@ -1786,8 +1802,8 @@
IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
RegionNode *r = new (C) RegionNode(3);
Node *phi = new (C) PhiNode(r, Type::DOUBLE);
- r->init_req(1, _gvn.transform( new (C) IfFalseNode(ifyeven)));
- r->init_req(2, _gvn.transform( new (C) IfTrueNode(ifyeven)));
+ r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyeven)));
+ r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyeven)));
phi->init_req(1, absxpowy);
phi->init_req(2, negabsxpowy);
signresult = _gvn.transform(phi);
@@ -1920,7 +1936,7 @@
int cmp_op = Op_CmpI;
Node* xkey = xvalue;
Node* ykey = yvalue;
- Node* ideal_cmpxy = _gvn.transform( new(C) CmpINode(xkey, ykey) );
+ Node* ideal_cmpxy = _gvn.transform(new(C) CmpINode(xkey, ykey));
if (ideal_cmpxy->is_Cmp()) {
// E.g., if we have CmpI(length - offset, count),
// it might idealize to CmpI(length, count + offset)
@@ -2013,7 +2029,7 @@
default:
if (cmpxy == NULL)
cmpxy = ideal_cmpxy;
- best_bol = _gvn.transform( new(C) BoolNode(cmpxy, BoolTest::lt) );
+ best_bol = _gvn.transform(new(C) BoolNode(cmpxy, BoolTest::lt));
// and fall through:
case BoolTest::lt: // x < y
case BoolTest::le: // x <= y
@@ -2073,7 +2089,7 @@
return Type::AnyPtr;
} else if (base_type == TypePtr::NULL_PTR) {
// Since this is a NULL+long form, we have to switch to a rawptr.
- base = _gvn.transform( new (C) CastX2PNode(offset) );
+ base = _gvn.transform(new (C) CastX2PNode(offset));
offset = MakeConX(0);
return Type::RawPtr;
} else if (base_type->base() == Type::RawPtr) {
@@ -2467,7 +2483,7 @@
case T_ADDRESS:
// Repackage the long as a pointer.
val = ConvL2X(val);
- val = _gvn.transform( new (C) CastX2PNode(val) );
+ val = _gvn.transform(new (C) CastX2PNode(val));
break;
}
@@ -2775,7 +2791,7 @@
// SCMemProjNodes represent the memory state of a LoadStore. Their
// main role is to prevent LoadStore nodes from being optimized away
// when their results aren't used.
- Node* proj = _gvn.transform( new (C) SCMemProjNode(load_store));
+ Node* proj = _gvn.transform(new (C) SCMemProjNode(load_store));
set_memory(proj, alias_idx);
// Add the trailing membar surrounding the access
@@ -3010,8 +3026,8 @@
Node* rec_thr = argument(0);
Node* tls_ptr = NULL;
Node* cur_thr = generate_current_thread(tls_ptr);
- Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
- Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
+ Node* cmp_thr = _gvn.transform(new (C) CmpPNode(cur_thr, rec_thr));
+ Node* bol_thr = _gvn.transform(new (C) BoolNode(cmp_thr, BoolTest::ne));
generate_slow_guard(bol_thr, slow_region);
@@ -3022,36 +3038,36 @@
// Set the control input on the field _interrupted read to prevent it floating up.
Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
- Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
- Node* bol_bit = _gvn.transform( new (C) BoolNode(cmp_bit, BoolTest::ne) );
+ Node* cmp_bit = _gvn.transform(new (C) CmpINode(int_bit, intcon(0)));
+ Node* bol_bit = _gvn.transform(new (C) BoolNode(cmp_bit, BoolTest::ne));
IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
// First fast path: if (!TLS._interrupted) return false;
- Node* false_bit = _gvn.transform( new (C) IfFalseNode(iff_bit) );
+ Node* false_bit = _gvn.transform(new (C) IfFalseNode(iff_bit));
result_rgn->init_req(no_int_result_path, false_bit);
result_val->init_req(no_int_result_path, intcon(0));
// drop through to next case
- set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)) );
+ set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));
// (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
Node* clr_arg = argument(1);
- Node* cmp_arg = _gvn.transform( new (C) CmpINode(clr_arg, intcon(0)) );
- Node* bol_arg = _gvn.transform( new (C) BoolNode(cmp_arg, BoolTest::ne) );
+ Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
+ Node* bol_arg = _gvn.transform(new (C) BoolNode(cmp_arg, BoolTest::ne));
IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);
// Second fast path: ... else if (!clear_int) return true;
- Node* false_arg = _gvn.transform( new (C) IfFalseNode(iff_arg) );
+ Node* false_arg = _gvn.transform(new (C) IfFalseNode(iff_arg));
result_rgn->init_req(no_clear_result_path, false_arg);
result_val->init_req(no_clear_result_path, intcon(1));
// drop through to next case
- set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)) );
+ set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));
// (d) Otherwise, go to the slow path.
slow_region->add_req(control());
- set_control( _gvn.transform(slow_region) );
+ set_control( _gvn.transform(slow_region));
if (stopped()) {
// There is no slow path.
@@ -3107,7 +3123,7 @@
if (region == NULL) never_see_null = true;
Node* p = basic_plus_adr(mirror, offset);
const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL;
- Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) );
+ Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type));
Node* null_ctl = top();
kls = null_check_oop(kls, &null_ctl, never_see_null);
if (region != NULL) {
@@ -3129,9 +3145,9 @@
Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
Node* mask = intcon(modifier_mask);
Node* bits = intcon(modifier_bits);
- Node* mbit = _gvn.transform( new (C) AndINode(mods, mask) );
- Node* cmp = _gvn.transform( new (C) CmpINode(mbit, bits) );
- Node* bol = _gvn.transform( new (C) BoolNode(cmp, BoolTest::ne) );
+ Node* mbit = _gvn.transform(new (C) AndINode(mods, mask));
+ Node* cmp = _gvn.transform(new (C) CmpINode(mbit, bits));
+ Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne));
return generate_fair_guard(bol, region);
}
Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
@@ -3282,7 +3298,7 @@
phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
// If we fall through, it's a plain class. Get its _super.
p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
- kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
+ kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL));
null_ctl = top();
kls = null_check_oop(kls, &null_ctl);
if (null_ctl != top()) {
@@ -3395,8 +3411,8 @@
set_control(region->in(_prim_0_path)); // go back to first null check
if (!stopped()) {
// Since superc is primitive, make a guard for the superc==subc case.
- Node* cmp_eq = _gvn.transform( new (C) CmpPNode(args[0], args[1]) );
- Node* bol_eq = _gvn.transform( new (C) BoolNode(cmp_eq, BoolTest::eq) );
+ Node* cmp_eq = _gvn.transform(new (C) CmpPNode(args[0], args[1]));
+ Node* bol_eq = _gvn.transform(new (C) BoolNode(cmp_eq, BoolTest::eq));
generate_guard(bol_eq, region, PROB_FAIR);
if (region->req() == PATH_LIMIT+1) {
// A guard was added. If the added guard is taken, superc==subc.
@@ -3461,11 +3477,11 @@
? ((jint)Klass::_lh_array_tag_type_value
<< Klass::_lh_array_tag_shift)
: Klass::_lh_neutral_value);
- Node* cmp = _gvn.transform( new(C) CmpINode(layout_val, intcon(nval)) );
+ Node* cmp = _gvn.transform(new(C) CmpINode(layout_val, intcon(nval)));
BoolTest::mask btest = BoolTest::lt; // correct for testing is_[obj]array
// invert the test if we are looking for a non-array
if (not_array) btest = BoolTest(btest).negate();
- Node* bol = _gvn.transform( new(C) BoolNode(cmp, btest) );
+ Node* bol = _gvn.transform(new(C) BoolNode(cmp, btest));
return generate_fair_guard(bol, region);
}
@@ -3525,7 +3541,7 @@
// Return the combined state.
set_i_o( _gvn.transform(result_io) );
- set_all_memory( _gvn.transform(result_mem) );
+ set_all_memory( _gvn.transform(result_mem));
C->set_has_split_ifs(true); // Has chance for split-if optimization
set_result(result_reg, result_val);
@@ -3678,8 +3694,8 @@
const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
Node* native_call = makecon(native_call_addr);
- Node* chk_native = _gvn.transform( new(C) CmpPNode(target_call, native_call) );
- Node* test_native = _gvn.transform( new(C) BoolNode(chk_native, BoolTest::ne) );
+ Node* chk_native = _gvn.transform(new(C) CmpPNode(target_call, native_call));
+ Node* test_native = _gvn.transform(new(C) BoolNode(chk_native, BoolTest::ne));
return generate_slow_guard(test_native, slow_region);
}
@@ -3800,10 +3816,10 @@
// Test the header to see if it is unlocked.
Node *lock_mask = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place);
- Node *lmasked_header = _gvn.transform( new (C) AndXNode(header, lock_mask) );
+ Node *lmasked_header = _gvn.transform(new (C) AndXNode(header, lock_mask));
Node *unlocked_val = _gvn.MakeConX(markOopDesc::unlocked_value);
- Node *chk_unlocked = _gvn.transform( new (C) CmpXNode( lmasked_header, unlocked_val));
- Node *test_unlocked = _gvn.transform( new (C) BoolNode( chk_unlocked, BoolTest::ne) );
+ Node *chk_unlocked = _gvn.transform(new (C) CmpXNode( lmasked_header, unlocked_val));
+ Node *test_unlocked = _gvn.transform(new (C) BoolNode( chk_unlocked, BoolTest::ne));
generate_slow_guard(test_unlocked, slow_region);
@@ -3813,17 +3829,17 @@
// vm: see markOop.hpp.
Node *hash_mask = _gvn.intcon(markOopDesc::hash_mask);
Node *hash_shift = _gvn.intcon(markOopDesc::hash_shift);
- Node *hshifted_header= _gvn.transform( new (C) URShiftXNode(header, hash_shift) );
+ Node *hshifted_header= _gvn.transform(new (C) URShiftXNode(header, hash_shift));
// This hack lets the hash bits live anywhere in the mark object now, as long
// as the shift drops the relevant bits into the low 32 bits. Note that
// Java spec says that HashCode is an int so there's no point in capturing
// an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build).
hshifted_header = ConvX2I(hshifted_header);
- Node *hash_val = _gvn.transform( new (C) AndINode(hshifted_header, hash_mask) );
+ Node *hash_val = _gvn.transform(new (C) AndINode(hshifted_header, hash_mask));
Node *no_hash_val = _gvn.intcon(markOopDesc::no_hash);
- Node *chk_assigned = _gvn.transform( new (C) CmpINode( hash_val, no_hash_val));
- Node *test_assigned = _gvn.transform( new (C) BoolNode( chk_assigned, BoolTest::eq) );
+ Node *chk_assigned = _gvn.transform(new (C) CmpINode( hash_val, no_hash_val));
+ Node *test_assigned = _gvn.transform(new (C) BoolNode( chk_assigned, BoolTest::eq));
generate_slow_guard(test_assigned, slow_region);
@@ -3854,7 +3870,7 @@
// Return the combined state.
set_i_o( _gvn.transform(result_io) );
- set_all_memory( _gvn.transform(result_mem) );
+ set_all_memory( _gvn.transform(result_mem));
set_result(result_reg, result_val);
return true;
@@ -3982,7 +3998,7 @@
Node *opt_isnan = _gvn.transform(ifisnan);
assert( opt_isnan->is_If(), "Expect an IfNode");
IfNode *opt_ifisnan = (IfNode*)opt_isnan;
- Node *iftrue = _gvn.transform( new (C) IfTrueNode(opt_ifisnan) );
+ Node *iftrue = _gvn.transform(new (C) IfTrueNode(opt_ifisnan));
set_control(iftrue);
@@ -4023,7 +4039,7 @@
Node *opt_isnan = _gvn.transform(ifisnan);
assert( opt_isnan->is_If(), "Expect an IfNode");
IfNode *opt_ifisnan = (IfNode*)opt_isnan;
- Node *iftrue = _gvn.transform( new (C) IfTrueNode(opt_ifisnan) );
+ Node *iftrue = _gvn.transform(new (C) IfTrueNode(opt_ifisnan));
set_control(iftrue);
@@ -4152,8 +4168,8 @@
// Compute the length also, if needed:
Node* countx = size;
- countx = _gvn.transform( new (C) SubXNode(countx, MakeConX(base_off)) );
- countx = _gvn.transform( new (C) URShiftXNode(countx, intcon(LogBytesPerLong) ));
+ countx = _gvn.transform(new (C) SubXNode(countx, MakeConX(base_off)));
+ countx = _gvn.transform(new (C) URShiftXNode(countx, intcon(LogBytesPerLong) ));
const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
bool disjoint_bases = true;
@@ -4357,9 +4373,9 @@
}
// Return the combined state.
- set_control( _gvn.transform(result_reg) );
- set_i_o( _gvn.transform(result_i_o) );
- set_all_memory( _gvn.transform(result_mem) );
+ set_control( _gvn.transform(result_reg));
+ set_i_o( _gvn.transform(result_i_o));
+ set_all_memory( _gvn.transform(result_mem));
} // original reexecute is set back here
set_result(_gvn.transform(result_val));
@@ -4684,8 +4700,8 @@
// are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
Node* dest_size = alloc->in(AllocateNode::AllocSize);
Node* dest_length = alloc->in(AllocateNode::ALength);
- Node* dest_tail = _gvn.transform( new(C) AddINode(dest_offset,
- copy_length) );
+ Node* dest_tail = _gvn.transform(new(C) AddINode(dest_offset,
+ copy_length));
// If there is a head section that needs zeroing, do it now.
if (find_int_con(dest_offset, -1) != 0) {
@@ -4701,8 +4717,8 @@
// the copy to a more hardware-friendly word size of 64 bits.
Node* tail_ctl = NULL;
if (!stopped() && !dest_tail->eqv_uncast(dest_length)) {
- Node* cmp_lt = _gvn.transform( new(C) CmpINode(dest_tail, dest_length) );
- Node* bol_lt = _gvn.transform( new(C) BoolNode(cmp_lt, BoolTest::lt) );
+ Node* cmp_lt = _gvn.transform(new(C) CmpINode(dest_tail, dest_length));
+ Node* bol_lt = _gvn.transform(new(C) BoolNode(cmp_lt, BoolTest::lt));
tail_ctl = generate_slow_guard(bol_lt, NULL);
assert(tail_ctl != NULL || !stopped(), "must be an outcome");
}
@@ -4745,7 +4761,7 @@
dest_size);
done_ctl->init_req(2, control());
done_mem->init_req(2, memory(adr_type));
- set_control( _gvn.transform(done_ctl) );
+ set_control( _gvn.transform(done_ctl));
set_memory( _gvn.transform(done_mem), adr_type );
}
}
@@ -4832,18 +4848,18 @@
// Clean up after the checked call.
// The returned value is either 0 or -1^K,
// where K = number of partially transferred array elements.
- Node* cmp = _gvn.transform( new(C) CmpINode(checked_value, intcon(0)) );
- Node* bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::eq) );
+ Node* cmp = _gvn.transform(new(C) CmpINode(checked_value, intcon(0)));
+ Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::eq));
IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
// If it is 0, we are done, so transfer to the end.
- Node* checks_done = _gvn.transform( new(C) IfTrueNode(iff) );
+ Node* checks_done = _gvn.transform(new(C) IfTrueNode(iff));
result_region->init_req(checked_path, checks_done);
result_i_o ->init_req(checked_path, checked_i_o);
result_memory->init_req(checked_path, checked_mem);
// If it is not zero, merge into the slow call.
- set_control( _gvn.transform( new(C) IfFalseNode(iff) ));
+ set_control( _gvn.transform(new(C) IfFalseNode(iff) ));
RegionNode* slow_reg2 = new(C) RegionNode(3);
PhiNode* slow_i_o2 = new(C) PhiNode(slow_reg2, Type::ABIO);
PhiNode* slow_mem2 = new(C) PhiNode(slow_reg2, Type::MEMORY, adr_type);
@@ -4866,16 +4882,16 @@
} else {
// We must continue the copy exactly where it failed, or else
// another thread might see the wrong number of writes to dest.
- Node* checked_offset = _gvn.transform( new(C) XorINode(checked_value, intcon(-1)) );
+ Node* checked_offset = _gvn.transform(new(C) XorINode(checked_value, intcon(-1)));
Node* slow_offset = new(C) PhiNode(slow_reg2, TypeInt::INT);
slow_offset->init_req(1, intcon(0));
slow_offset->init_req(2, checked_offset);
slow_offset = _gvn.transform(slow_offset);
// Adjust the arguments by the conditionally incoming offset.
- Node* src_off_plus = _gvn.transform( new(C) AddINode(src_offset, slow_offset) );
- Node* dest_off_plus = _gvn.transform( new(C) AddINode(dest_offset, slow_offset) );
- Node* length_minus = _gvn.transform( new(C) SubINode(copy_length, slow_offset) );
+ Node* src_off_plus = _gvn.transform(new(C) AddINode(src_offset, slow_offset));
+ Node* dest_off_plus = _gvn.transform(new(C) AddINode(dest_offset, slow_offset));
+ Node* length_minus = _gvn.transform(new(C) SubINode(copy_length, slow_offset));
// Tweak the node variables to adjust the code produced below:
src_offset = src_off_plus;
@@ -4914,7 +4930,7 @@
}
// Finished; return the combined state.
- set_control( _gvn.transform(result_region) );
+ set_control( _gvn.transform(result_region));
set_i_o( _gvn.transform(result_i_o) );
set_memory( _gvn.transform(result_memory), adr_type );
@@ -5096,10 +5112,10 @@
int end_round = (-1 << scale) & (BytesPerLong - 1);
Node* end = ConvI2X(slice_len);
if (scale != 0)
- end = _gvn.transform( new(C) LShiftXNode(end, intcon(scale) ));
+ end = _gvn.transform(new(C) LShiftXNode(end, intcon(scale) ));
end_base += end_round;
- end = _gvn.transform( new(C) AddXNode(end, MakeConX(end_base)) );
- end = _gvn.transform( new(C) AndXNode(end, MakeConX(~end_round)) );
+ end = _gvn.transform(new(C) AddXNode(end, MakeConX(end_base)));
+ end = _gvn.transform(new(C) AndXNode(end, MakeConX(~end_round)));
mem = ClearArrayNode::clear_memory(control(), mem, dest,
start_con, end, &_gvn);
} else if (start_con < 0 && dest_size != top()) {
@@ -5108,8 +5124,8 @@
Node* start = slice_idx;
start = ConvI2X(start);
if (scale != 0)
- start = _gvn.transform( new(C) LShiftXNode( start, intcon(scale) ));
- start = _gvn.transform( new(C) AddXNode(start, MakeConX(abase)) );
+ start = _gvn.transform(new(C) LShiftXNode( start, intcon(scale) ));
+ start = _gvn.transform(new(C) AddXNode(start, MakeConX(abase)));
if ((bump_bit | clear_low) != 0) {
int to_clear = (bump_bit | clear_low);
// Align up mod 8, then store a jint zero unconditionally
@@ -5120,14 +5136,14 @@
assert((abase & to_clear) == 0, "array base must be long-aligned");
} else {
// Bump 'start' up to (or past) the next jint boundary:
- start = _gvn.transform( new(C) AddXNode(start, MakeConX(bump_bit)) );
+ start = _gvn.transform(new(C) AddXNode(start, MakeConX(bump_bit)));
assert((abase & clear_low) == 0, "array base must be int-aligned");
}
// Round bumped 'start' down to jlong boundary in body of array.
- start = _gvn.transform( new(C) AndXNode(start, MakeConX(~to_clear)) );
+ start = _gvn.transform(new(C) AndXNode(start, MakeConX(~to_clear)));
if (bump_bit != 0) {
// Store a zero to the immediately preceding jint:
- Node* x1 = _gvn.transform( new(C) AddXNode(start, MakeConX(-bump_bit)) );
+ Node* x1 = _gvn.transform(new(C) AddXNode(start, MakeConX(-bump_bit)));
Node* p1 = basic_plus_adr(dest, x1);
mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT);
mem = _gvn.transform(mem);
@@ -5194,8 +5210,8 @@
Node* sptr = basic_plus_adr(src, src_off);
Node* dptr = basic_plus_adr(dest, dest_off);
Node* countx = dest_size;
- countx = _gvn.transform( new (C) SubXNode(countx, MakeConX(dest_off)) );
- countx = _gvn.transform( new (C) URShiftXNode(countx, intcon(LogBytesPerLong)) );
+ countx = _gvn.transform(new (C) SubXNode(countx, MakeConX(dest_off)));
+ countx = _gvn.transform(new (C) URShiftXNode(countx, intcon(LogBytesPerLong)));
bool disjoint_bases = true; // since alloc != NULL
generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
@@ -5360,6 +5376,117 @@
return true;
}
+/**
+ * Calculate CRC32 for byte.
+ * int java.util.zip.CRC32.update(int crc, int b)
+ */
+bool LibraryCallKit::inline_updateCRC32() {
+ assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+ assert(callee()->signature()->size() == 2, "update has 2 parameters");
+ // no receiver since it is static method
+ Node* crc = argument(0); // type: int
+ Node* b = argument(1); // type: int
+
+ /*
+ * int c = ~ crc;
+ * b = timesXtoThe32[(b ^ c) & 0xFF];
+ * b = b ^ (c >>> 8);
+ * crc = ~b;
+ */
+
+ Node* M1 = intcon(-1);
+ crc = _gvn.transform(new (C) XorINode(crc, M1));
+ Node* result = _gvn.transform(new (C) XorINode(crc, b));
+ result = _gvn.transform(new (C) AndINode(result, intcon(0xFF)));
+
+ Node* base = makecon(TypeRawPtr::make(StubRoutines::crc_table_addr()));
+ Node* offset = _gvn.transform(new (C) LShiftINode(result, intcon(0x2)));
+ Node* adr = basic_plus_adr(top(), base, ConvI2X(offset));
+ result = make_load(control(), adr, TypeInt::INT, T_INT);
+
+ crc = _gvn.transform(new (C) URShiftINode(crc, intcon(8)));
+ result = _gvn.transform(new (C) XorINode(crc, result));
+ result = _gvn.transform(new (C) XorINode(result, M1));
+ set_result(result);
+ return true;
+}
+
+/**
+ * Calculate CRC32 for byte[] array.
+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] buf, int off, int len)
+ */
+bool LibraryCallKit::inline_updateBytesCRC32() {
+ assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+ assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters");
+ // no receiver since it is static method
+ Node* crc = argument(0); // type: int
+ Node* src = argument(1); // type: oop
+ Node* offset = argument(2); // type: int
+ Node* length = argument(3); // type: int
+
+ const Type* src_type = src->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ if (top_src == NULL || top_src->klass() == NULL) {
+ // failed array check
+ return false;
+ }
+
+ // Figure out the size and type of the elements we will be copying.
+ BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ if (src_elem != T_BYTE) {
+ return false;
+ }
+
+ // 'src_start' points to src array + scaled offset
+ Node* src_start = array_element_address(src, offset, src_elem);
+
+ // We assume that range check is done by caller.
+ // TODO: generate range check (offset+length < src.length) in debug VM.
+
+ // Call the stub.
+ address stubAddr = StubRoutines::updateBytesCRC32();
+ const char *stubName = "updateBytesCRC32";
+
+ Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::updateBytesCRC32_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ crc, src_start, length);
+ Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+ set_result(result);
+ return true;
+}
+
+/**
+ * Calculate CRC32 for ByteBuffer.
+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+bool LibraryCallKit::inline_updateByteBufferCRC32() {
+ assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+ assert(callee()->signature()->size() == 5, "updateByteBuffer has 4 parameters and one is long");
+ // no receiver since it is static method
+ Node* crc = argument(0); // type: int
+ Node* src = argument(1); // type: long
+ Node* offset = argument(3); // type: int
+ Node* length = argument(4); // type: int
+
+ src = ConvL2X(src); // adjust Java long to machine word
+ Node* base = _gvn.transform(new (C) CastX2PNode(src));
+ offset = ConvI2X(offset);
+
+ // 'src_start' points to src array + scaled offset
+ Node* src_start = basic_plus_adr(top(), base, offset);
+
+ // Call the stub.
+ address stubAddr = StubRoutines::updateBytesCRC32();
+ const char *stubName = "updateBytesCRC32";
+
+ Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::updateBytesCRC32_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ crc, src_start, length);
+ Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+ set_result(result);
+ return true;
+}
+
//----------------------------inline_reference_get----------------------------
// public T java.lang.ref.Reference.get();
bool LibraryCallKit::inline_reference_get() {
--- a/hotspot/src/share/vm/opto/runtime.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/opto/runtime.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -829,6 +829,28 @@
return TypeFunc::make(domain, range);
}
+/**
+ * int updateBytesCRC32(int crc, byte* b, int len)
+ */
+const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
+ // create input type (domain)
+ int num_args = 3;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypeInt::INT; // crc
+ fields[argp++] = TypePtr::NOTNULL; // src
+ fields[argp++] = TypeInt::INT; // len
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+ return TypeFunc::make(domain, range);
+}
+
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
--- a/hotspot/src/share/vm/opto/runtime.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/opto/runtime.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -284,6 +284,8 @@
static const TypeFunc* aescrypt_block_Type();
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+ static const TypeFunc* updateBytesCRC32_Type();
+
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
--- a/hotspot/src/share/vm/runtime/globals.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -644,6 +644,9 @@
product(bool, UseAESIntrinsics, false, \
"use intrinsics for AES versions of crypto") \
\
+ product(bool, UseCRC32Intrinsics, false, \
+ "use intrinsics for java.util.zip.CRC32") \
+ \
develop(bool, TraceCallFixup, false, \
"traces all call fixups") \
\
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,9 @@
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
+address StubRoutines::_updateBytesCRC32 = NULL;
+address StubRoutines::_crc_table_adr = NULL;
+
double (* StubRoutines::_intrinsic_log )(double) = NULL;
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
double (* StubRoutines::_intrinsic_exp )(double) = NULL;
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp Tue Jul 02 07:51:31 2013 +0200
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp Tue Jul 02 20:42:12 2013 -0400
@@ -204,6 +204,9 @@
static address _cipherBlockChaining_encryptAESCrypt;
static address _cipherBlockChaining_decryptAESCrypt;
+ static address _updateBytesCRC32;
+ static address _crc_table_adr;
+
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
// constant folding in the compiler to ensure equivalence. If the
@@ -342,6 +345,9 @@
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
+ static address updateBytesCRC32() { return _updateBytesCRC32; }
+ static address crc_table_addr() { return _crc_table_adr; }
+
static address select_fill_function(BasicType t, bool aligned, const char* &name);
static address zero_aligned_words() { return _zero_aligned_words; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7088419/CRCTest.java Tue Jul 02 20:42:12 2013 -0400
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ @test
+ @bug 7088419
+ @run main CRCTest
+ @summary Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32 and java.util.zip.Adler32
+ */
+
+import java.nio.ByteBuffer;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+public class CRCTest {
+
+ public static void main(String[] args) throws Exception {
+
+ byte[] b = initializedBytes(4096 * 4096);
+
+ {
+ CRC32 crc1 = new CRC32();
+ CRC32 crc2 = new CRC32();
+ CRC32 crc3 = new CRC32();
+ CRC32 crc4 = new CRC32();
+
+ crc1.update(b, 0, b.length);
+ updateSerial(crc2, b, 0, b.length);
+ updateDirect(crc3, b, 0, b.length);
+ updateSerialSlow(crc4, b, 0, b.length);
+
+ check(crc1, crc2);
+ check(crc3, crc4);
+ check(crc1, crc3);
+
+ crc1.update(17);
+ crc2.update(17);
+ crc3.update(17);
+ crc4.update(17);
+
+ crc1.update(b, 1, b.length-2);
+ updateSerial(crc2, b, 1, b.length-2);
+ updateDirect(crc3, b, 1, b.length-2);
+ updateSerialSlow(crc4, b, 1, b.length-2);
+
+ check(crc1, crc2);
+ check(crc3, crc4);
+ check(crc1, crc3);
+
+ report("finished huge crc", crc1, crc2, crc3, crc4);
+
+ for (int i = 0; i < 256; i++) {
+ for (int j = 0; j < 256; j += 1) {
+ crc1.update(b, i, j);
+ updateSerial(crc2, b, i, j);
+ updateDirect(crc3, b, i, j);
+ updateSerialSlow(crc4, b, i, j);
+
+ check(crc1, crc2);
+ check(crc3, crc4);
+ check(crc1, crc3);
+
+ }
+ }
+
+ report("finished small survey crc", crc1, crc2, crc3, crc4);
+ }
+
+ }
+
+ private static void report(String s, Checksum crc1, Checksum crc2,
+ Checksum crc3, Checksum crc4) {
+ System.out.println(s + ", crc1 = " + crc1.getValue() +
+ ", crc2 = " + crc2.getValue()+
+ ", crc3 = " + crc3.getValue()+
+ ", crc4 = " + crc4.getValue());
+ }
+
+ private static void check(Checksum crc1, Checksum crc2) throws Exception {
+ if (crc1.getValue() != crc2.getValue()) {
+ String s = "value 1 = " + crc1.getValue() + ", value 2 = " + crc2.getValue();
+ System.err.println(s);
+ throw new Exception(s);
+ }
+ }
+
+ private static byte[] initializedBytes(int M) {
+ byte[] bytes = new byte[M];
+ for (int i = 0; i < bytes.length; i++) {
+ bytes[i] = (byte) i;
+ }
+ return bytes;
+ }
+
+ private static void updateSerial(Checksum crc, byte[] b, int start, int length) {
+ for (int i = 0; i < length; i++)
+ crc.update(b[i+start]);
+ }
+
+ private static void updateSerialSlow(Checksum crc, byte[] b, int start, int length) {
+ for (int i = 0; i < length; i++)
+ crc.update(b[i+start]);
+ crc.getValue();
+ }
+
+ private static void updateDirect(CRC32 crc3, byte[] b, int start, int length) {
+ ByteBuffer buf = ByteBuffer.allocateDirect(length);
+ buf.put(b, start, length);
+ buf.flip();
+ crc3.update(buf);
+ }
+}