--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Fri Jul 31 12:13:57 2015 +0200
@@ -2167,8 +2167,12 @@
return 0; // Self copy, no move.
}
+ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
+ int src_offset = ra_->reg2offset(src_lo);
+ int dst_offset = ra_->reg2offset(dst_lo);
+
if (bottom_type()->isa_vect() != NULL) {
- uint len = 4;
uint ireg = ideal_reg();
assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
if (cbuf) {
@@ -2176,334 +2180,115 @@
assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
// stack->stack
- int src_offset = ra_->reg2offset(src_lo);
- int dst_offset = ra_->reg2offset(dst_lo);
assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
- len = 8;
if (ireg == Op_VecD) {
- __ ldr(rscratch1, Address(sp, src_offset));
- __ str(rscratch1, Address(sp, dst_offset));
+ __ unspill(rscratch1, true, src_offset);
+ __ spill(rscratch1, true, dst_offset);
} else {
- if (src_offset < 512) {
- __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
- } else {
- __ ldr(rscratch1, Address(sp, src_offset));
- __ ldr(rscratch2, Address(sp, src_offset+4));
- len += 4;
- }
- if (dst_offset < 512) {
- __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
- } else {
- __ str(rscratch1, Address(sp, dst_offset));
- __ str(rscratch2, Address(sp, dst_offset+4));
- len += 4;
- }
+ __ spill_copy128(src_offset, dst_offset);
}
} else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
- __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
ireg == Op_VecD ? __ T8B : __ T16B,
- as_FloatRegister(Matcher::_regEncode[src_lo]),
as_FloatRegister(Matcher::_regEncode[src_lo]));
} else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
- __ str(as_FloatRegister(Matcher::_regEncode[src_lo]),
- ireg == Op_VecD ? __ D : __ Q,
- Address(sp, ra_->reg2offset(dst_lo)));
+ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
+ ireg == Op_VecD ? __ D : __ Q,
+ ra_->reg2offset(dst_lo));
} else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
- __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- ireg == Op_VecD ? __ D : __ Q,
- Address(sp, ra_->reg2offset(src_lo)));
+ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ ireg == Op_VecD ? __ D : __ Q,
+ ra_->reg2offset(src_lo));
} else {
ShouldNotReachHere();
}
- } else if (st) {
- if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
- // stack->stack
- int src_offset = ra_->reg2offset(src_lo);
- int dst_offset = ra_->reg2offset(dst_lo);
- if (ireg == Op_VecD) {
- st->print("ldr rscratch1, [sp, #%d]", src_offset);
- st->print("str rscratch1, [sp, #%d]", dst_offset);
+ }
+ } else if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ switch (src_lo_rc) {
+ case rc_int:
+ if (dst_lo_rc == rc_int) { // gpr --> gpr copy
+ if (is64) {
+ __ mov(as_Register(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
} else {
- if (src_offset < 512) {
- st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset);
- } else {
- st->print("ldr rscratch1, [sp, #%d]", src_offset);
- st->print("\nldr rscratch2, [sp, #%d]", src_offset+4);
- }
- if (dst_offset < 512) {
- st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset);
- } else {
- st->print("\nstr rscratch1, [sp, #%d]", dst_offset);
- st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4);
- }
- }
- st->print("\t# vector spill, stack to stack");
- } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
- st->print("mov %s, %s\t# vector spill, reg to reg",
- Matcher::regName[dst_lo], Matcher::regName[src_lo]);
- } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
- st->print("str %s, [sp, #%d]\t# vector spill, reg to stack",
- Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
- } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
- st->print("ldr %s, [sp, #%d]\t# vector spill, stack to reg",
- Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
- }
- }
- return len;
- }
-
- switch (src_lo_rc) {
- case rc_int:
- if (dst_lo_rc == rc_int) { // gpr --> gpr copy
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ mov(as_Register(Matcher::_regEncode[dst_lo]),
- as_Register(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("mov %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
- }
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ movw(as_Register(Matcher::_regEncode[dst_lo]),
- as_Register(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("movw %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
+ MacroAssembler _masm(cbuf);
+ __ movw(as_Register(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
}
- }
- } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- as_Register(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("fmovd %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
- }
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- as_Register(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("fmovs %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
- }
- }
- } else { // gpr --> stack spill
- assert(dst_lo_rc == rc_stack, "spill to bad register class");
- int dst_offset = ra_->reg2offset(dst_lo);
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ str(as_Register(Matcher::_regEncode[src_lo]),
- Address(sp, dst_offset));
- } else if (st) {
- st->print("str %s, [sp, #%d]\t# spill",
- Matcher::regName[src_lo],
- dst_offset);
- }
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ strw(as_Register(Matcher::_regEncode[src_lo]),
- Address(sp, dst_offset));
- } else if (st) {
- st->print("strw %s, [sp, #%d]\t# spill",
- Matcher::regName[src_lo],
- dst_offset);
- }
- }
- }
- return 4;
- case rc_float:
- if (dst_lo_rc == rc_int) { // fpr --> gpr copy
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
- as_FloatRegister(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("fmovd %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
+ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
+ if (is64) {
+ __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
+ } else {
+ __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_Register(Matcher::_regEncode[src_lo]));
}
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
- as_FloatRegister(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("fmovs %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
- }
- }
- } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- as_FloatRegister(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("fmovd %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
- }
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- as_FloatRegister(Matcher::_regEncode[src_lo]));
- } else if (st) {
- st->print("fmovs %s, %s\t# shuffle",
- Matcher::regName[dst_lo],
- Matcher::regName[src_lo]);
- }
- }
- } else { // fpr --> stack spill
- assert(dst_lo_rc == rc_stack, "spill to bad register class");
- int dst_offset = ra_->reg2offset(dst_lo);
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]),
- Address(sp, dst_offset));
- } else if (st) {
- st->print("strd %s, [sp, #%d]\t# spill",
- Matcher::regName[src_lo],
- dst_offset);
- }
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]),
- Address(sp, dst_offset));
- } else if (st) {
- st->print("strs %s, [sp, #%d]\t# spill",
- Matcher::regName[src_lo],
- dst_offset);
- }
+ } else { // gpr --> stack spill
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
+ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
}
- }
- return 4;
- case rc_stack:
- int src_offset = ra_->reg2offset(src_lo);
- if (dst_lo_rc == rc_int) { // stack --> gpr load
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ ldr(as_Register(Matcher::_regEncode[dst_lo]),
- Address(sp, src_offset));
- } else if (st) {
- st->print("ldr %s, [sp, %d]\t# restore",
- Matcher::regName[dst_lo],
- src_offset);
+ break;
+ case rc_float:
+ if (dst_lo_rc == rc_int) { // fpr --> gpr copy
+ if (is64) {
+ __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
}
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ ldrw(as_Register(Matcher::_regEncode[dst_lo]),
- Address(sp, src_offset));
- } else if (st) {
- st->print("ldr %s, [sp, %d]\t# restore",
- Matcher::regName[dst_lo],
- src_offset);
- }
- }
- return 4;
- } else if (dst_lo_rc == rc_float) { // stack --> fpr load
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- Address(sp, src_offset));
- } else if (st) {
- st->print("ldrd %s, [sp, %d]\t# restore",
- Matcher::regName[dst_lo],
- src_offset);
+ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
+ if (cbuf) {
+ __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
}
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
- Address(sp, src_offset));
- } else if (st) {
- st->print("ldrs %s, [sp, %d]\t# restore",
- Matcher::regName[dst_lo],
- src_offset);
- }
+ } else { // fpr --> stack spill
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
+ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
+ is64 ? __ D : __ S, dst_offset);
}
- return 4;
- } else { // stack --> stack copy
- assert(dst_lo_rc == rc_stack, "spill to bad register class");
- int dst_offset = ra_->reg2offset(dst_lo);
- if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
- (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
- // 64 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ ldr(rscratch1, Address(sp, src_offset));
- __ str(rscratch1, Address(sp, dst_offset));
- } else if (st) {
- st->print("ldr rscratch1, [sp, %d]\t# mem-mem spill",
- src_offset);
- st->print("\n\t");
- st->print("str rscratch1, [sp, %d]",
- dst_offset);
- }
- } else {
- // 32 bit
- if (cbuf) {
- MacroAssembler _masm(cbuf);
- __ ldrw(rscratch1, Address(sp, src_offset));
- __ strw(rscratch1, Address(sp, dst_offset));
- } else if (st) {
- st->print("ldrw rscratch1, [sp, %d]\t# mem-mem spill",
- src_offset);
- st->print("\n\t");
- st->print("strw rscratch1, [sp, %d]",
- dst_offset);
- }
+ break;
+ case rc_stack:
+ if (dst_lo_rc == rc_int) { // stack --> gpr load
+ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
+ } else if (dst_lo_rc == rc_float) { // stack --> fpr load
+ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ is64 ? __ D : __ S, src_offset);
+ } else { // stack --> stack copy
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
+ __ unspill(rscratch1, is64, src_offset);
+ __ spill(rscratch1, is64, dst_offset);
}
- return 8;
+ break;
+ default:
+ assert(false, "bad rc_class for spill");
+ ShouldNotReachHere();
}
}
- assert(false," bad rc_class for spill ");
- Unimplemented();
+ if (st) {
+ st->print("spill ");
+ if (src_lo_rc == rc_stack) {
+ st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
+ } else {
+ st->print("%s -> ", Matcher::regName[src_lo]);
+ }
+ if (dst_lo_rc == rc_stack) {
+ st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
+ } else {
+ st->print("%s", Matcher::regName[dst_lo]);
+ }
+ if (bottom_type()->isa_vect() != NULL) {
+ st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
+ } else {
+ st->print("\t# spill size = %d", is64 ? 64:32);
+ }
+ }
+
return 0;
}
@@ -2522,7 +2307,7 @@
}
uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
- return implementation(NULL, ra_, true, NULL);
+ return MachNode::size(ra_);
}
//=============================================================================
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -1896,7 +1896,7 @@
public:
enum SIMD_Arrangement {
- T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D
+ T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
};
enum SIMD_RegVariant {
@@ -2225,14 +2225,16 @@
f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
}
- // We do not handle the 1Q arrangement.
void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
starti;
- assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier");
- f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10);
- rf(Vn, 5), rf(Vd, 0);
+ assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) ||
+ (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier");
+ int size = (Ta == T1Q) ? 0b11 : 0b00;
+ f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22);
+ f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+ assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier");
pmull(Vd, Ta, Vn, Vm, Tb);
}
@@ -2245,15 +2247,6 @@
f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
}
- void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
- {
- starti;
- assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H");
- f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24);
- f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10);
- rf(Vn, 5), rf(Vd, 0);
- }
-
void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
{
starti;
@@ -2290,6 +2283,57 @@
#undef INSN
+ // Table vector lookup
+#define INSN(NAME, op) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \
+ starti; \
+ assert(T == T8B || T == T16B, "invalid arrangement"); \
+ assert(0 < registers && registers <= 4, "invalid number of registers"); \
+ f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \
+ f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(tbl, 0);
+ INSN(tbx, 1);
+
+#undef INSN
+
+#define INSN(NAME, U, opcode) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
+ starti; \
+ assert((ASSERTION), MSG); \
+ f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \
+ f((int)(T >> 1), 23, 22), f(0b10000, 21, 17), f(opcode, 16, 12); \
+ f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+#define MSG "invalid arrangement"
+
+#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
+ INSN(rev64, 0, 0b00000);
+#undef ASSERTION
+
+#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
+ INSN(rev32, 1, 0b00000);
+#undef ASSERTION
+
+#define ASSERTION (T == T8B || T == T16B)
+ INSN(rev16, 0, 0b00001);
+#undef ASSERTION
+
+#undef MSG
+
+#undef INSN
+
+void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
+ {
+ starti;
+ assert(T == T8B || T == T16B, "invalid arrangement");
+ assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
+ f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
+ rf(Vm, 16), f(0, 15), f(index, 14, 11);
+ f(0, 10), rf(Vn, 5), rf(Vd, 0);
+ }
/* Simulator extensions to the ISA
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -2009,6 +2009,14 @@
}
}
+void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) {
+ if (decrement.is_register()) {
+ sub(Rd, Rn, decrement.as_register());
+ } else {
+ sub(Rd, Rn, decrement.as_constant());
+ }
+}
+
void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
@@ -2307,6 +2315,28 @@
}
}
+Address MacroAssembler::spill_address(int size, int offset, Register tmp)
+{
+ assert(offset >= 0, "spill to negative address?");
+ // Offset reachable ?
+ // Not aligned - 9 bits signed offset
+ // Aligned - 12 bits unsigned offset shifted
+ Register base = sp;
+ if ((offset & (size-1)) && offset >= (1<<8)) {
+ add(tmp, base, offset & ((1<<12)-1));
+ base = tmp;
+ offset &= -1<<12;
+ }
+
+ if (offset >= (1<<12) * size) {
+ add(tmp, base, offset & (((1<<12)-1)<<12));
+ base = tmp;
+ offset &= ~(((1<<12)-1)<<12);
+ }
+
+ return Address(base, offset);
+}
+
/**
* Multiply 64 bit by 64 bit first loop.
*/
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -464,10 +464,21 @@
mov(dst, (long)i);
}
+ void mov(Register dst, RegisterOrConstant src) {
+ if (src.is_register())
+ mov(dst, src.as_register());
+ else
+ mov(dst, src.as_constant());
+ }
+
void movptr(Register r, uintptr_t imm64);
void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);
+ void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
+ orr(Vd, T, Vn, Vn);
+ }
+
// macro instructions for accessing and updating floating point
// status register
//
@@ -1045,6 +1056,7 @@
void add(Register Rd, Register Rn, RegisterOrConstant increment);
void addw(Register Rd, Register Rn, RegisterOrConstant increment);
+ void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
@@ -1161,6 +1173,46 @@
// Uses rscratch2.
Address offsetted_address(Register r, Register r1, Address::extend ext,
int offset, int size);
+
+private:
+ // Returns an address on the stack which is reachable with a ldr/str of size
+ // Uses rscratch2 if the address is not directly reachable
+ Address spill_address(int size, int offset, Register tmp=rscratch2);
+
+public:
+ void spill(Register Rx, bool is64, int offset) {
+ if (is64) {
+ str(Rx, spill_address(8, offset));
+ } else {
+ strw(Rx, spill_address(4, offset));
+ }
+ }
+ void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
+ str(Vx, T, spill_address(1 << (int)T, offset));
+ }
+ void unspill(Register Rx, bool is64, int offset) {
+ if (is64) {
+ ldr(Rx, spill_address(8, offset));
+ } else {
+ ldrw(Rx, spill_address(4, offset));
+ }
+ }
+ void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
+ ldr(Vx, T, spill_address(1 << (int)T, offset));
+ }
+ void spill_copy128(int src_offset, int dst_offset,
+ Register tmp1=rscratch1, Register tmp2=rscratch2) {
+ if (src_offset < 512 && (src_offset & 7) == 0 &&
+ dst_offset < 512 && (dst_offset & 7) == 0) {
+ ldp(tmp1, tmp2, Address(sp, src_offset));
+ stp(tmp1, tmp2, Address(sp, dst_offset));
+ } else {
+ unspill(tmp1, true, src_offset);
+ spill(tmp1, true, dst_offset);
+ unspill(tmp1, true, src_offset+8);
+ spill(tmp1, true, dst_offset+8);
+ }
+ }
};
#ifdef ASSERT
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -120,10 +120,8 @@
// we save r19-r28 which Java uses as scratch registers and C
// expects to be callee-save
//
- // we don't save any FP registers since only v8-v15 are callee-save
- // (strictly only the f and d components) and Java uses them as
- // callee-save. v0-v7 are arg registers and C treats v16-v31 as
- // volatile (as does Java?)
+ // we save the bottom 64 bits of each value stored in v8-v15; it is
+ // the responsibility of the caller to preserve larger values.
//
// so the stub frame looks like this when we enter Java code
//
@@ -131,14 +129,14 @@
// [ argument word n ]
// ...
// -27 [ argument word 1 ]
- // -26 [ saved d15 ] <--- sp_after_call
- // -25 [ saved d14 ]
- // -24 [ saved d13 ]
- // -23 [ saved d12 ]
- // -22 [ saved d11 ]
- // -21 [ saved d10 ]
- // -20 [ saved d9 ]
- // -19 [ saved d8 ]
+ // -26 [ saved v15 ] <--- sp_after_call
+ // -25 [ saved v14 ]
+ // -24 [ saved v13 ]
+ // -23 [ saved v12 ]
+ // -22 [ saved v11 ]
+ // -21 [ saved v10 ]
+ // -20 [ saved v9 ]
+ // -19 [ saved v8 ]
// -18 [ saved r28 ]
// -17 [ saved r27 ]
// -16 [ saved r26 ]
@@ -2437,6 +2435,137 @@
return start;
}
+ /**
+ * Arguments:
+ *
+ * Input:
+ * c_rarg0 - current state address
+ * c_rarg1 - H key address
+ * c_rarg2 - data address
+ * c_rarg3 - number of blocks
+ *
+ * Output:
+ * Updated state at c_rarg0
+ */
+ address generate_ghash_processBlocks() {
+ __ align(CodeEntryAlignment);
+ Label L_ghash_loop, L_exit;
+
+ StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+ address start = __ pc();
+
+ Register state = c_rarg0;
+ Register subkeyH = c_rarg1;
+ Register data = c_rarg2;
+ Register blocks = c_rarg3;
+
+ FloatRegister vzr = v30;
+ __ eor(vzr, __ T16B, vzr, vzr); // zero register
+
+ __ mov(v26, __ T16B, 1);
+ __ mov(v27, __ T16B, 63);
+ __ mov(v28, __ T16B, 62);
+ __ mov(v29, __ T16B, 57);
+
+ __ ldrq(v6, Address(state));
+ __ ldrq(v16, Address(subkeyH));
+
+ __ ext(v0, __ T16B, v6, v6, 0x08);
+ __ ext(v1, __ T16B, v16, v16, 0x08);
+ __ eor(v16, __ T16B, v16, v1);
+
+ __ bind(L_ghash_loop);
+
+ __ ldrq(v2, Address(__ post(data, 0x10)));
+ __ rev64(v2, __ T16B, v2); // swap data
+
+ __ ext(v6, __ T16B, v0, v0, 0x08);
+ __ eor(v6, __ T16B, v6, v2);
+ __ ext(v2, __ T16B, v6, v6, 0x08);
+
+ __ pmull2(v7, __ T1Q, v2, v1, __ T2D); // A1*B1
+ __ eor(v6, __ T16B, v6, v2);
+ __ pmull(v5, __ T1Q, v2, v1, __ T1D); // A0*B0
+ __ pmull(v20, __ T1Q, v6, v16, __ T1D); // (A1 + A0)(B1 + B0)
+
+ __ ext(v21, __ T16B, v5, v7, 0x08);
+ __ eor(v18, __ T16B, v7, v5); // A1*B1 xor A0*B0
+ __ eor(v20, __ T16B, v20, v21);
+ __ eor(v20, __ T16B, v20, v18);
+
+ // Registers pair <v7:v5> holds the result of carry-less multiplication
+ __ ins(v7, __ D, v20, 0, 1);
+ __ ins(v5, __ D, v20, 1, 0);
+
+ // Result of the multiplication is shifted by one bit position
+ // [X3:X2:X1:X0] = [X3:X2:X1:X0] << 1
+ __ ushr(v18, __ T2D, v5, -63 & 63);
+ __ ins(v25, __ D, v18, 1, 0);
+ __ ins(v25, __ D, vzr, 0, 0);
+ __ ushl(v5, __ T2D, v5, v26);
+ __ orr(v5, __ T16B, v5, v25);
+
+ __ ushr(v19, __ T2D, v7, -63 & 63);
+ __ ins(v19, __ D, v19, 1, 0);
+ __ ins(v19, __ D, v18, 0, 1);
+ __ ushl(v7, __ T2D, v7, v26);
+ __ orr(v6, __ T16B, v7, v19);
+
+ __ ins(v24, __ D, v5, 0, 1);
+
+ // A = X0 << 63
+ __ ushl(v21, __ T2D, v5, v27);
+
+ // A = X0 << 62
+ __ ushl(v22, __ T2D, v5, v28);
+
+ // A = X0 << 57
+ __ ushl(v23, __ T2D, v5, v29);
+
+ // D = X1^A^B^C
+ __ eor(v21, __ T16B, v21, v22);
+ __ eor(v21, __ T16B, v21, v23);
+ __ eor(v21, __ T16B, v21, v24);
+ __ ins(v5, __ D, v21, 1, 0);
+
+ // [E1:E0] = [D:X0] >> 1
+ __ ushr(v20, __ T2D, v5, -1 & 63);
+ __ ushl(v18, __ T2D, v5, v27);
+ __ ext(v25, __ T16B, v18, vzr, 0x08);
+ __ orr(v19, __ T16B, v20, v25);
+
+ __ eor(v7, __ T16B, v5, v19);
+
+ // [F1:F0] = [D:X0] >> 2
+ __ ushr(v20, __ T2D, v5, -2 & 63);
+ __ ushl(v18, __ T2D, v5, v28);
+ __ ins(v25, __ D, v18, 0, 1);
+ __ orr(v19, __ T16B, v20, v25);
+
+ __ eor(v7, __ T16B, v7, v19);
+
+ // [G1:G0] = [D:X0] >> 7
+ __ ushr(v20, __ T2D, v5, -7 & 63);
+ __ ushl(v18, __ T2D, v5, v29);
+ __ ins(v25, __ D, v18, 0, 1);
+ __ orr(v19, __ T16B, v20, v25);
+
+ // [H1:H0] = [D^E1^F1^G1:X0^E0^F0^G0]
+ __ eor(v7, __ T16B, v7, v19);
+
+ // Result = [H1:H0]^[X3:X2]
+ __ eor(v0, __ T16B, v7, v6);
+
+ __ subs(blocks, blocks, 1);
+ __ cbnz(blocks, L_ghash_loop);
+
+ __ ext(v1, __ T16B, v0, v0, 0x08);
+ __ st1(v1, __ T16B, state);
+ __ ret(lr);
+
+ return start;
+ }
+
// Continuation point for throwing of implicit exceptions that are
// not handled in the current activation. Fabricates an exception
// oop and initiates normal exception dispatching in this
@@ -2544,6 +2673,828 @@
return stub->entry_point();
}
+ class MontgomeryMultiplyGenerator : public MacroAssembler {
+
+ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
+ Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
+
+ RegSet _toSave;
+ bool _squaring;
+
+ public:
+ MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
+ : MacroAssembler(as->code()), _squaring(squaring) {
+
+ // Register allocation
+
+ Register reg = c_rarg0;
+ Pa_base = reg; // Argument registers
+ if (squaring)
+ Pb_base = Pa_base;
+ else
+ Pb_base = ++reg;
+ Pn_base = ++reg;
+ Rlen= ++reg;
+ inv = ++reg;
+ Pm_base = ++reg;
+
+ // Working registers:
+ Ra = ++reg; // The current digit of a, b, n, and m.
+ Rb = ++reg;
+ Rm = ++reg;
+ Rn = ++reg;
+
+ Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m.
+ Pb = ++reg;
+ Pm = ++reg;
+ Pn = ++reg;
+
+ t0 = ++reg; // Three registers which form a
+ t1 = ++reg; // triple-precision accumuator.
+ t2 = ++reg;
+
+ Ri = ++reg; // Inner and outer loop indexes.
+ Rj = ++reg;
+
+ Rhi_ab = ++reg; // Product registers: low and high parts
+ Rlo_ab = ++reg; // of a*b and m*n.
+ Rhi_mn = ++reg;
+ Rlo_mn = ++reg;
+
+ // r19 and up are callee-saved.
+ _toSave = RegSet::range(r19, reg) + Pm_base;
+ }
+
+ private:
+ void save_regs() {
+ push(_toSave, sp);
+ }
+
+ void restore_regs() {
+ pop(_toSave, sp);
+ }
+
+ template <typename T>
+ void unroll_2(Register count, T block) {
+ Label loop, end, odd;
+ tbnz(count, 0, odd);
+ cbz(count, end);
+ align(16);
+ bind(loop);
+ (this->*block)();
+ bind(odd);
+ (this->*block)();
+ subs(count, count, 2);
+ br(Assembler::GT, loop);
+ bind(end);
+ }
+
+ template <typename T>
+ void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
+ Label loop, end, odd;
+ tbnz(count, 0, odd);
+ cbz(count, end);
+ align(16);
+ bind(loop);
+ (this->*block)(d, s, tmp);
+ bind(odd);
+ (this->*block)(d, s, tmp);
+ subs(count, count, 2);
+ br(Assembler::GT, loop);
+ bind(end);
+ }
+
+ void pre1(RegisterOrConstant i) {
+ block_comment("pre1");
+ // Pa = Pa_base;
+ // Pb = Pb_base + i;
+ // Pm = Pm_base;
+ // Pn = Pn_base + i;
+ // Ra = *Pa;
+ // Rb = *Pb;
+ // Rm = *Pm;
+ // Rn = *Pn;
+ ldr(Ra, Address(Pa_base));
+ ldr(Rb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord)));
+ ldr(Rm, Address(Pm_base));
+ ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+ lea(Pa, Address(Pa_base));
+ lea(Pb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord)));
+ lea(Pm, Address(Pm_base));
+ lea(Pn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+
+ // Zero the m*n result.
+ mov(Rhi_mn, zr);
+ mov(Rlo_mn, zr);
+ }
+
+ // The core multiply-accumulate step of a Montgomery
+ // multiplication. The idea is to schedule operations as a
+ // pipeline so that instructions with long latencies (loads and
+ // multiplies) have time to complete before their results are
+ // used. This most benefits in-order implementations of the
+ // architecture but out-of-order ones also benefit.
+ void step() {
+ block_comment("step");
+ // MACC(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ umulh(Rhi_ab, Ra, Rb);
+ mul(Rlo_ab, Ra, Rb);
+ ldr(Ra, pre(Pa, wordSize));
+ ldr(Rb, pre(Pb, -wordSize));
+ acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n from the
+ // previous iteration.
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ umulh(Rhi_mn, Rm, Rn);
+ mul(Rlo_mn, Rm, Rn);
+ ldr(Rm, pre(Pm, wordSize));
+ ldr(Rn, pre(Pn, -wordSize));
+ acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+ }
+
+ void post1() {
+ block_comment("post1");
+
+ // MACC(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ umulh(Rhi_ab, Ra, Rb);
+ mul(Rlo_ab, Ra, Rb);
+ acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n
+ acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+
+ // *Pm = Rm = t0 * inv;
+ mul(Rm, t0, inv);
+ str(Rm, Address(Pm));
+
+ // MACC(Rm, Rn, t0, t1, t2);
+ // t0 = t1; t1 = t2; t2 = 0;
+ umulh(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
+ {
+ mul(Rlo_mn, Rm, Rn);
+ add(Rlo_mn, t0, Rlo_mn);
+ Label ok;
+ cbz(Rlo_mn, ok); {
+ stop("broken Montgomery multiply");
+ } bind(ok);
+ }
+#endif
+ // We have very carefully set things up so that
+ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
+ // the lower half of Rm * Rn because we know the result already:
+ // it must be -t0. t0 + (-t0) must generate a carry iff
+ // t0 != 0. So, rather than do a mul and an adds we just set
+ // the carry flag iff t0 is nonzero.
+ //
+ // mul(Rlo_mn, Rm, Rn);
+ // adds(zr, t0, Rlo_mn);
+ subs(zr, t0, 1); // Set carry iff t0 is nonzero
+ adcs(t0, t1, Rhi_mn);
+ adc(t1, t2, zr);
+ mov(t2, zr);
+ }
+
+ void pre2(RegisterOrConstant i, RegisterOrConstant len) {
+ block_comment("pre2");
+ // Pa = Pa_base + i-len;
+ // Pb = Pb_base + len;
+ // Pm = Pm_base + i-len;
+ // Pn = Pn_base + len;
+
+ if (i.is_register()) {
+ sub(Rj, i.as_register(), len);
+ } else {
+ mov(Rj, i.as_constant());
+ sub(Rj, Rj, len);
+ }
+ // Rj == i-len
+
+ lea(Pa, Address(Pa_base, Rj, Address::uxtw(LogBytesPerWord)));
+ lea(Pb, Address(Pb_base, len, Address::uxtw(LogBytesPerWord)));
+ lea(Pm, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord)));
+ lea(Pn, Address(Pn_base, len, Address::uxtw(LogBytesPerWord)));
+
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ ldr(Ra, pre(Pa, wordSize));
+ ldr(Rb, pre(Pb, -wordSize));
+ ldr(Rm, pre(Pm, wordSize));
+ ldr(Rn, pre(Pn, -wordSize));
+
+ mov(Rhi_mn, zr);
+ mov(Rlo_mn, zr);
+ }
+
+ void post2(RegisterOrConstant i, RegisterOrConstant len) {
+ block_comment("post2");
+ if (i.is_constant()) {
+ mov(Rj, i.as_constant()-len.as_constant());
+ } else {
+ sub(Rj, i.as_register(), len);
+ }
+
+ adds(t0, t0, Rlo_mn); // The pending m*n, low part
+
+ // As soon as we know the least significant digit of our result,
+ // store it.
+ // Pm_base[i-len] = t0;
+ str(t0, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord)));
+
+ // t0 = t1; t1 = t2; t2 = 0;
+ adcs(t0, t1, Rhi_mn); // The pending m*n, high part
+ adc(t1, t2, zr);
+ mov(t2, zr);
+ }
+
+ // A carry in t0 after Montgomery multiplication means that we
+ // should subtract multiples of n from our result in m. We'll
+ // keep doing that until there is no carry.
+ void normalize(RegisterOrConstant len) {
+ block_comment("normalize");
+ // while (t0)
+ // t0 = sub(Pm_base, Pn_base, t0, len);
+ Label loop, post, again;
+ Register cnt = t1, i = t2; // Re-use registers; we're done with them now
+ cbz(t0, post); {
+ bind(again); {
+ mov(i, zr);
+ mov(cnt, len);
+ ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
+ ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+ subs(zr, zr, zr); // set carry flag, i.e. no borrow
+ align(16);
+ bind(loop); {
+ sbcs(Rm, Rm, Rn);
+ str(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
+ add(i, i, 1);
+ ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
+ ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+ sub(cnt, cnt, 1);
+ } cbnz(cnt, loop);
+ sbc(t0, t0, zr);
+ } cbnz(t0, again);
+ } bind(post);
+ }
+
+ // Move memory at s to d, reversing words.
+ // Increments d to end of copied memory
+ // Destroys tmp1, tmp2
+ // Preserves len
+ // Leaves s pointing to the address which was in d at start
+ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
+ assert(tmp1 < r19 && tmp2 < r19, "register corruption");
+
+ lea(s, Address(s, len, Address::uxtw(LogBytesPerWord)));
+ mov(tmp1, len);
+ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
+ sub(s, d, len, ext::uxtw, LogBytesPerWord);
+ }
+ // where
+ void reverse1(Register d, Register s, Register tmp) {
+ ldr(tmp, pre(s, -wordSize));
+ ror(tmp, tmp, 32);
+ str(tmp, post(d, wordSize));
+ }
+
+ void step_squaring() {
+ // An extra ACC
+ step();
+ acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+ }
+
+ void last_squaring(RegisterOrConstant i) {
+ Label dont;
+ // if ((i & 1) == 0) {
+ tbnz(i.as_register(), 0, dont); {
+ // MACC(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ umulh(Rhi_ab, Ra, Rb);
+ mul(Rlo_ab, Ra, Rb);
+ acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+ } bind(dont);
+ }
+
+ void extra_step_squaring() {
+ acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n
+
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ umulh(Rhi_mn, Rm, Rn);
+ mul(Rlo_mn, Rm, Rn);
+ ldr(Rm, pre(Pm, wordSize));
+ ldr(Rn, pre(Pn, -wordSize));
+ }
+
+ void post1_squaring() {
+ acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n
+
+ // *Pm = Rm = t0 * inv;
+ mul(Rm, t0, inv);
+ str(Rm, Address(Pm));
+
+ // MACC(Rm, Rn, t0, t1, t2);
+ // t0 = t1; t1 = t2; t2 = 0;
+ umulh(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
+ {
+ mul(Rlo_mn, Rm, Rn);
+ add(Rlo_mn, t0, Rlo_mn);
+ Label ok;
+ cbz(Rlo_mn, ok); {
+ stop("broken Montgomery multiply");
+ } bind(ok);
+ }
+#endif
+ // We have very carefully set things up so that
+ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
+ // the lower half of Rm * Rn because we know the result already:
+ // it must be -t0. t0 + (-t0) must generate a carry iff
+ // t0 != 0. So, rather than do a mul and an adds we just set
+ // the carry flag iff t0 is nonzero.
+ //
+ // mul(Rlo_mn, Rm, Rn);
+ // adds(zr, t0, Rlo_mn);
+ subs(zr, t0, 1); // Set carry iff t0 is nonzero
+ adcs(t0, t1, Rhi_mn);
+ adc(t1, t2, zr);
+ mov(t2, zr);
+ }
+
+ void acc(Register Rhi, Register Rlo,
+ Register t0, Register t1, Register t2) {
+ adds(t0, t0, Rlo);
+ adcs(t1, t1, Rhi);
+ adc(t2, t2, zr);
+ }
+
+ public:
+ /**
+ * Fast Montgomery multiplication. The derivation of the
+ * algorithm is in A Cryptographic Library for the Motorola
+ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+ *
+ * Arguments:
+ *
+ * Inputs for multiplication:
+ * c_rarg0 - int array elements a
+ * c_rarg1 - int array elements b
+ * c_rarg2 - int array elements n (the modulus)
+ * c_rarg3 - int length
+ * c_rarg4 - int inv
+ * c_rarg5 - int array elements m (the result)
+ *
+ * Inputs for squaring:
+ * c_rarg0 - int array elements a
+ * c_rarg1 - int array elements n (the modulus)
+ * c_rarg2 - int length
+ * c_rarg3 - int inv
+ * c_rarg4 - int array elements m (the result)
+ *
+ */
+ address generate_multiply() {
+ Label argh, nothing;
+ bind(argh);
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+ align(CodeEntryAlignment);
+ address entry = pc();
+
+ cbzw(Rlen, nothing);
+
+ enter();
+
+ // Make room.
+ cmpw(Rlen, 512);
+ br(Assembler::HI, argh);
+ sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint)));
+ andr(sp, Ra, -2 * wordSize);
+
+ lsrw(Rlen, Rlen, 1); // length in longwords = len/2
+
+ {
+ // Copy input args, reversing as we go. We use Ra as a
+ // temporary variable.
+ reverse(Ra, Pa_base, Rlen, t0, t1);
+ if (!_squaring)
+ reverse(Ra, Pb_base, Rlen, t0, t1);
+ reverse(Ra, Pn_base, Rlen, t0, t1);
+ }
+
+ // Push all call-saved registers and also Pm_base which we'll need
+ // at the end.
+ save_regs();
+
+#ifndef PRODUCT
+ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+ {
+ ldr(Rn, Address(Pn_base, 0));
+ mul(Rlo_mn, Rn, inv);
+ cmp(Rlo_mn, -1);
+ Label ok;
+ br(EQ, ok); {
+ stop("broken inverse in Montgomery multiply");
+ } bind(ok);
+ }
+#endif
+
+ mov(Pm_base, Ra);
+
+ mov(t0, zr);
+ mov(t1, zr);
+ mov(t2, zr);
+
+ block_comment("for (int i = 0; i < len; i++) {");
+ mov(Ri, zr); {
+ Label loop, end;
+ cmpw(Ri, Rlen);
+ br(Assembler::GE, end);
+
+ bind(loop);
+ pre1(Ri);
+
+ block_comment(" for (j = i; j; j--) {"); {
+ movw(Rj, Ri);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+ } block_comment(" } // j");
+
+ post1();
+ addw(Ri, Ri, 1);
+ cmpw(Ri, Rlen);
+ br(Assembler::LT, loop);
+ bind(end);
+ block_comment("} // i");
+ }
+
+ block_comment("for (int i = len; i < 2*len; i++) {");
+ mov(Ri, Rlen); {
+ Label loop, end;
+ cmpw(Ri, Rlen, Assembler::LSL, 1);
+ br(Assembler::GE, end);
+
+ bind(loop);
+ pre2(Ri, Rlen);
+
+ block_comment(" for (j = len*2-i-1; j; j--) {"); {
+ lslw(Rj, Rlen, 1);
+ subw(Rj, Rj, Ri);
+ subw(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+ } block_comment(" } // j");
+
+ post2(Ri, Rlen);
+ addw(Ri, Ri, 1);
+ cmpw(Ri, Rlen, Assembler::LSL, 1);
+ br(Assembler::LT, loop);
+ bind(end);
+ }
+ block_comment("} // i");
+
+ normalize(Rlen);
+
+ mov(Ra, Pm_base); // Save Pm_base in Ra
+ restore_regs(); // Restore caller's Pm_base
+
+ // Copy our result into caller's Pm_base
+ reverse(Pm_base, Ra, Rlen, t0, t1);
+
+ leave();
+ bind(nothing);
+ ret(lr);
+
+ return entry;
+ }
+ // In C, approximately:
+
+ // void
+ // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
+ // unsigned long Pn_base[], unsigned long Pm_base[],
+ // unsigned long inv, int len) {
+ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+ // unsigned long *Pa, *Pb, *Pn, *Pm;
+ // unsigned long Ra, Rb, Rn, Rm;
+
+ // int i;
+
+ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
+
+ // for (i = 0; i < len; i++) {
+ // int j;
+
+ // Pa = Pa_base;
+ // Pb = Pb_base + i;
+ // Pm = Pm_base;
+ // Pn = Pn_base + i;
+
+ // Ra = *Pa;
+ // Rb = *Pb;
+ // Rm = *Pm;
+ // Rn = *Pn;
+
+ // int iters = i;
+ // for (j = 0; iters--; j++) {
+ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
+ // MACC(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ // }
+
+ // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
+ // MACC(Ra, Rb, t0, t1, t2);
+ // *Pm = Rm = t0 * inv;
+ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+
+ // assert(t0 == 0, "broken Montgomery multiply");
+
+ // t0 = t1; t1 = t2; t2 = 0;
+ // }
+
+ // for (i = len; i < 2*len; i++) {
+ // int j;
+
+ // Pa = Pa_base + i-len;
+ // Pb = Pb_base + len;
+ // Pm = Pm_base + i-len;
+ // Pn = Pn_base + len;
+
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+
+ // int iters = len*2-i-1;
+ // for (j = i-len+1; iters--; j++) {
+ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
+ // MACC(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ // }
+
+ // Pm_base[i-len] = t0;
+ // t0 = t1; t1 = t2; t2 = 0;
+ // }
+
+ // while (t0)
+ // t0 = sub(Pm_base, Pn_base, t0, len);
+ // }
+
+ /**
+ * Fast Montgomery squaring. This uses asymptotically 25% fewer
+ * multiplies than Montgomery multiplication so it should be up to
+ * 25% faster. However, its loop control is more complex and it
+ * may actually run slower on some machines.
+ *
+ * Arguments:
+ *
+ * Inputs:
+ * c_rarg0 - int array elements a
+ * c_rarg1 - int array elements n (the modulus)
+ * c_rarg2 - int length
+ * c_rarg3 - int inv
+ * c_rarg4 - int array elements m (the result)
+ *
+ */
+ address generate_square() {
+ Label argh;
+ bind(argh);
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+ align(CodeEntryAlignment);
+ address entry = pc();
+
+ enter();
+
+ // Make room.
+ cmpw(Rlen, 512);
+ br(Assembler::HI, argh);
+ sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint)));
+ andr(sp, Ra, -2 * wordSize);
+
+ lsrw(Rlen, Rlen, 1); // length in longwords = len/2
+
+ {
+ // Copy input args, reversing as we go. We use Ra as a
+ // temporary variable.
+ reverse(Ra, Pa_base, Rlen, t0, t1);
+ reverse(Ra, Pn_base, Rlen, t0, t1);
+ }
+
+ // Push all call-saved registers and also Pm_base which we'll need
+ // at the end.
+ save_regs();
+
+ mov(Pm_base, Ra);
+
+ mov(t0, zr);
+ mov(t1, zr);
+ mov(t2, zr);
+
+ block_comment("for (int i = 0; i < len; i++) {");
+ mov(Ri, zr); {
+ Label loop, end;
+ bind(loop);
+ cmp(Ri, Rlen);
+ br(Assembler::GE, end);
+
+ pre1(Ri);
+
+ block_comment("for (j = (i+1)/2; j; j--) {"); {
+ add(Rj, Ri, 1);
+ lsr(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+ } block_comment(" } // j");
+
+ last_squaring(Ri);
+
+ block_comment(" for (j = i/2; j; j--) {"); {
+ lsr(Rj, Ri, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+ } block_comment(" } // j");
+
+ post1_squaring();
+ add(Ri, Ri, 1);
+ cmp(Ri, Rlen);
+ br(Assembler::LT, loop);
+
+ bind(end);
+ block_comment("} // i");
+ }
+
+ block_comment("for (int i = len; i < 2*len; i++) {");
+ mov(Ri, Rlen); {
+ Label loop, end;
+ bind(loop);
+ cmp(Ri, Rlen, Assembler::LSL, 1);
+ br(Assembler::GE, end);
+
+ pre2(Ri, Rlen);
+
+ block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); {
+ lsl(Rj, Rlen, 1);
+ sub(Rj, Rj, Ri);
+ sub(Rj, Rj, 1);
+ lsr(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+ } block_comment(" } // j");
+
+ last_squaring(Ri);
+
+ block_comment(" for (j = (2*len-i)/2; j; j--) {"); {
+ lsl(Rj, Rlen, 1);
+ sub(Rj, Rj, Ri);
+ lsr(Rj, Rj, 1);
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+ } block_comment(" } // j");
+
+ post2(Ri, Rlen);
+ add(Ri, Ri, 1);
+ cmp(Ri, Rlen, Assembler::LSL, 1);
+
+ br(Assembler::LT, loop);
+ bind(end);
+ block_comment("} // i");
+ }
+
+ normalize(Rlen);
+
+ mov(Ra, Pm_base); // Save Pm_base in Ra
+ restore_regs(); // Restore caller's Pm_base
+
+ // Copy our result into caller's Pm_base
+ reverse(Pm_base, Ra, Rlen, t0, t1);
+
+ leave();
+ ret(lr);
+
+ return entry;
+ }
+ // In C, approximately:
+
+ // void
+ // montgomery_square(unsigned long Pa_base[], unsigned long Pn_base[],
+ // unsigned long Pm_base[], unsigned long inv, int len) {
+ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+ // unsigned long *Pa, *Pb, *Pn, *Pm;
+ // unsigned long Ra, Rb, Rn, Rm;
+
+ // int i;
+
+ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
+
+ // for (i = 0; i < len; i++) {
+ // int j;
+
+ // Pa = Pa_base;
+ // Pb = Pa_base + i;
+ // Pm = Pm_base;
+ // Pn = Pn_base + i;
+
+ // Ra = *Pa;
+ // Rb = *Pb;
+ // Rm = *Pm;
+ // Rn = *Pn;
+
+ // int iters = (i+1)/2;
+ // for (j = 0; iters--; j++) {
+ // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be");
+ // MACC2(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ // }
+ // if ((i & 1) == 0) {
+ // assert(Ra == Pa_base[j], "must be");
+ // MACC(Ra, Ra, t0, t1, t2);
+ // }
+ // iters = i/2;
+ // assert(iters == i-j, "must be");
+ // for (; iters--; j++) {
+ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ // }
+
+ // *Pm = Rm = t0 * inv;
+ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+
+ // assert(t0 == 0, "broken Montgomery multiply");
+
+ // t0 = t1; t1 = t2; t2 = 0;
+ // }
+
+ // for (i = len; i < 2*len; i++) {
+ // int start = i-len+1;
+ // int end = start + (len - start)/2;
+ // int j;
+
+ // Pa = Pa_base + i-len;
+ // Pb = Pa_base + len;
+ // Pm = Pm_base + i-len;
+ // Pn = Pn_base + len;
+
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+
+ // int iters = (2*len-i-1)/2;
+ // assert(iters == end-start, "must be");
+ // for (j = start; iters--; j++) {
+ // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be");
+ // MACC2(Ra, Rb, t0, t1, t2);
+ // Ra = *++Pa;
+ // Rb = *--Pb;
+ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ // }
+ // if ((i & 1) == 0) {
+ // assert(Ra == Pa_base[j], "must be");
+ // MACC(Ra, Ra, t0, t1, t2);
+ // }
+ // iters = (2*len-i)/2;
+ // assert(iters == len-j, "must be");
+ // for (; iters--; j++) {
+ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+ // MACC(Rm, Rn, t0, t1, t2);
+ // Rm = *++Pm;
+ // Rn = *--Pn;
+ // }
+ // Pm_base[i-len] = t0;
+ // t0 = t1; t1 = t2; t2 = 0;
+ // }
+
+ // while (t0)
+ // t0 = sub(Pm_base, Pn_base, t0, len);
+ // }
+ };
+
// Initialization
void generate_initial() {
// Generate initial stubs and initializes the entry points
@@ -2603,7 +3554,26 @@
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
+ if (UseMontgomeryMultiplyIntrinsic) {
+ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
+ StubRoutines::_montgomeryMultiply = g.generate_multiply();
+ }
+
+ if (UseMontgomerySquareIntrinsic) {
+ StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+ // We use generate_multiply() rather than generate_square()
+ // because it's faster for the sizes of modulus we care about.
+ StubRoutines::_montgomerySquare = g.generate_multiply();
+ }
+
#ifndef BUILTIN_SIM
+ // generate GHASH intrinsics code
+ if (UseGHASHIntrinsics) {
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+ }
+
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -45,6 +45,10 @@
#define HWCAP_AES (1<<3)
#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1<<4)
+#endif
+
#ifndef HWCAP_SHA1
#define HWCAP_SHA1 (1<<5)
#endif
@@ -190,11 +194,6 @@
}
}
- if (UseGHASHIntrinsics) {
- warning("GHASH intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
- }
-
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
UseCRC32Intrinsics = true;
}
@@ -232,7 +231,7 @@
}
} else if (UseSHA256Intrinsics) {
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
- FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
if (UseSHA512Intrinsics) {
@@ -244,6 +243,15 @@
FLAG_SET_DEFAULT(UseSHA, false);
}
+ if (auxv & HWCAP_PMULL) {
+ if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+ FLAG_SET_DEFAULT(UseGHASHIntrinsics, true);
+ }
+ } else if (UseGHASHIntrinsics) {
+ warning("GHASH intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+ }
+
// This machine allows unaligned memory accesses
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
@@ -261,6 +269,13 @@
UsePopCountInstruction = true;
}
+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+ UseMontgomeryMultiplyIntrinsic = true;
+ }
+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+ UseMontgomerySquareIntrinsic = true;
+ }
+
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;
--- a/hotspot/src/share/vm/c1/c1_Compiler.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -99,6 +99,164 @@
return buffer_blob;
}
+bool Compiler::is_intrinsic_supported(methodHandle method) {
+ vmIntrinsics::ID id = method->intrinsic_id();
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+ if (method->is_synchronized()) {
+ // C1 does not support intrinsification of synchronized methods.
+ return false;
+ }
+
+ switch (id) {
+ case vmIntrinsics::_compareAndSwapLong:
+ if (!VM_Version::supports_cx8()) return false;
+ break;
+ case vmIntrinsics::_getAndAddInt:
+ if (!VM_Version::supports_atomic_getadd4()) return false;
+ break;
+ case vmIntrinsics::_getAndAddLong:
+ if (!VM_Version::supports_atomic_getadd8()) return false;
+ break;
+ case vmIntrinsics::_getAndSetInt:
+ if (!VM_Version::supports_atomic_getset4()) return false;
+ break;
+ case vmIntrinsics::_getAndSetLong:
+ if (!VM_Version::supports_atomic_getset8()) return false;
+ break;
+ case vmIntrinsics::_getAndSetObject:
+#ifdef _LP64
+ if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) return false;
+ if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) return false;
+#else
+ if (!VM_Version::supports_atomic_getset4()) return false;
+#endif
+ break;
+ case vmIntrinsics::_arraycopy:
+ case vmIntrinsics::_currentTimeMillis:
+ case vmIntrinsics::_nanoTime:
+ case vmIntrinsics::_Reference_get:
+ // Use the intrinsic version of Reference.get() so that the value in
+ // the referent field can be registered by the G1 pre-barrier code.
+ // Also to prevent commoning reads from this field across safepoint
+ // since GC can change its value.
+ case vmIntrinsics::_loadFence:
+ case vmIntrinsics::_storeFence:
+ case vmIntrinsics::_fullFence:
+ case vmIntrinsics::_floatToRawIntBits:
+ case vmIntrinsics::_intBitsToFloat:
+ case vmIntrinsics::_doubleToRawLongBits:
+ case vmIntrinsics::_longBitsToDouble:
+ case vmIntrinsics::_getClass:
+ case vmIntrinsics::_isInstance:
+ case vmIntrinsics::_currentThread:
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_dsqrt:
+ case vmIntrinsics::_dsin:
+ case vmIntrinsics::_dcos:
+ case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dlog:
+ case vmIntrinsics::_dlog10:
+ case vmIntrinsics::_dexp:
+ case vmIntrinsics::_dpow:
+ case vmIntrinsics::_getObject:
+ case vmIntrinsics::_getBoolean:
+ case vmIntrinsics::_getByte:
+ case vmIntrinsics::_getShort:
+ case vmIntrinsics::_getChar:
+ case vmIntrinsics::_getInt:
+ case vmIntrinsics::_getLong:
+ case vmIntrinsics::_getFloat:
+ case vmIntrinsics::_getDouble:
+ case vmIntrinsics::_putObject:
+ case vmIntrinsics::_putBoolean:
+ case vmIntrinsics::_putByte:
+ case vmIntrinsics::_putShort:
+ case vmIntrinsics::_putChar:
+ case vmIntrinsics::_putInt:
+ case vmIntrinsics::_putLong:
+ case vmIntrinsics::_putFloat:
+ case vmIntrinsics::_putDouble:
+ case vmIntrinsics::_getObjectVolatile:
+ case vmIntrinsics::_getBooleanVolatile:
+ case vmIntrinsics::_getByteVolatile:
+ case vmIntrinsics::_getShortVolatile:
+ case vmIntrinsics::_getCharVolatile:
+ case vmIntrinsics::_getIntVolatile:
+ case vmIntrinsics::_getLongVolatile:
+ case vmIntrinsics::_getFloatVolatile:
+ case vmIntrinsics::_getDoubleVolatile:
+ case vmIntrinsics::_putObjectVolatile:
+ case vmIntrinsics::_putBooleanVolatile:
+ case vmIntrinsics::_putByteVolatile:
+ case vmIntrinsics::_putShortVolatile:
+ case vmIntrinsics::_putCharVolatile:
+ case vmIntrinsics::_putIntVolatile:
+ case vmIntrinsics::_putLongVolatile:
+ case vmIntrinsics::_putFloatVolatile:
+ case vmIntrinsics::_putDoubleVolatile:
+ case vmIntrinsics::_getByte_raw:
+ case vmIntrinsics::_getShort_raw:
+ case vmIntrinsics::_getChar_raw:
+ case vmIntrinsics::_getInt_raw:
+ case vmIntrinsics::_getLong_raw:
+ case vmIntrinsics::_getFloat_raw:
+ case vmIntrinsics::_getDouble_raw:
+ case vmIntrinsics::_putByte_raw:
+ case vmIntrinsics::_putShort_raw:
+ case vmIntrinsics::_putChar_raw:
+ case vmIntrinsics::_putInt_raw:
+ case vmIntrinsics::_putLong_raw:
+ case vmIntrinsics::_putFloat_raw:
+ case vmIntrinsics::_putDouble_raw:
+ case vmIntrinsics::_putOrderedObject:
+ case vmIntrinsics::_putOrderedInt:
+ case vmIntrinsics::_putOrderedLong:
+ case vmIntrinsics::_getShortUnaligned:
+ case vmIntrinsics::_getCharUnaligned:
+ case vmIntrinsics::_getIntUnaligned:
+ case vmIntrinsics::_getLongUnaligned:
+ case vmIntrinsics::_putShortUnaligned:
+ case vmIntrinsics::_putCharUnaligned:
+ case vmIntrinsics::_putIntUnaligned:
+ case vmIntrinsics::_putLongUnaligned:
+ case vmIntrinsics::_checkIndex:
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ case vmIntrinsics::_compareAndSwapInt:
+ case vmIntrinsics::_compareAndSwapObject:
+#ifdef TRACE_HAVE_INTRINSICS
+ case vmIntrinsics::_classID:
+ case vmIntrinsics::_threadID:
+ case vmIntrinsics::_counterTime:
+#endif
+ break;
+ default:
+ return false; // Intrinsics not on the previous list are not available.
+ }
+
+ return true;
+}
+
+bool Compiler::is_intrinsic_disabled_by_flag(methodHandle method) {
+ vmIntrinsics::ID id = method->intrinsic_id();
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+ if (vmIntrinsics::is_disabled_by_flags(id)) {
+ return true;
+ }
+
+ if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
+ return true;
+ }
+
+ if (!InlineClassNatives && id == vmIntrinsics::_getClass) {
+ return true;
+ }
+
+ return false;
+}
void Compiler::compile_method(ciEnv* env, ciMethod* method, int entry_bci) {
BufferBlob* buffer_blob = CompilerThread::current()->get_buffer_blob();
@@ -117,3 +275,7 @@
void Compiler::print_timers() {
Compilation::print_timers();
}
+
+bool Compiler::is_intrinsic_available(methodHandle method, methodHandle compilation_context) {
+ return is_intrinsic_supported(method) && !is_intrinsic_disabled_by_flag(method);
+}
--- a/hotspot/src/share/vm/c1/c1_Compiler.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_Compiler.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -55,6 +55,18 @@
// Print compilation timers and statistics
virtual void print_timers();
+ // Check the availability of an intrinsic for 'method' given a compilation context.
+ // The compilation context is needed to support per-method usage of the
+ // DisableIntrinsic flag. However, as C1 ignores the DisableIntrinsic flag, it
+ // ignores the compilation context.
+ virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context);
+
+ // Check if the C1 compiler supports an intrinsic for 'method'.
+ virtual bool is_intrinsic_supported(methodHandle method);
+
+ // Processing of command-line flags specific to the C1 compiler.
+ virtual bool is_intrinsic_disabled_by_flag(methodHandle method);
+
// Size of the code buffer
static int code_buffer_size();
};
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -3372,231 +3372,85 @@
return NULL;
}
-
-bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
- if (callee->is_synchronized()) {
- // We don't currently support any synchronized intrinsics
- return false;
- }
-
- // callee seems like a good candidate
- // determine id
+void GraphBuilder::build_graph_for_intrinsic(ciMethod* callee) {
vmIntrinsics::ID id = callee->intrinsic_id();
- if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
- // InlineNatives does not control Reference.get
- INLINE_BAILOUT("intrinsic method inlining disabled");
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+ // Some intrinsics need special IR nodes.
+ switch(id) {
+ case vmIntrinsics::_getObject : append_unsafe_get_obj(callee, T_OBJECT, false); return;
+ case vmIntrinsics::_getBoolean : append_unsafe_get_obj(callee, T_BOOLEAN, false); return;
+ case vmIntrinsics::_getByte : append_unsafe_get_obj(callee, T_BYTE, false); return;
+ case vmIntrinsics::_getShort : append_unsafe_get_obj(callee, T_SHORT, false); return;
+ case vmIntrinsics::_getChar : append_unsafe_get_obj(callee, T_CHAR, false); return;
+ case vmIntrinsics::_getInt : append_unsafe_get_obj(callee, T_INT, false); return;
+ case vmIntrinsics::_getLong : append_unsafe_get_obj(callee, T_LONG, false); return;
+ case vmIntrinsics::_getFloat : append_unsafe_get_obj(callee, T_FLOAT, false); return;
+ case vmIntrinsics::_getDouble : append_unsafe_get_obj(callee, T_DOUBLE, false); return;
+ case vmIntrinsics::_putObject : append_unsafe_put_obj(callee, T_OBJECT, false); return;
+ case vmIntrinsics::_putBoolean : append_unsafe_put_obj(callee, T_BOOLEAN, false); return;
+ case vmIntrinsics::_putByte : append_unsafe_put_obj(callee, T_BYTE, false); return;
+ case vmIntrinsics::_putShort : append_unsafe_put_obj(callee, T_SHORT, false); return;
+ case vmIntrinsics::_putChar : append_unsafe_put_obj(callee, T_CHAR, false); return;
+ case vmIntrinsics::_putInt : append_unsafe_put_obj(callee, T_INT, false); return;
+ case vmIntrinsics::_putLong : append_unsafe_put_obj(callee, T_LONG, false); return;
+ case vmIntrinsics::_putFloat : append_unsafe_put_obj(callee, T_FLOAT, false); return;
+ case vmIntrinsics::_putDouble : append_unsafe_put_obj(callee, T_DOUBLE, false); return;
+ case vmIntrinsics::_getShortUnaligned : append_unsafe_get_obj(callee, T_SHORT, false); return;
+ case vmIntrinsics::_getCharUnaligned : append_unsafe_get_obj(callee, T_CHAR, false); return;
+ case vmIntrinsics::_getIntUnaligned : append_unsafe_get_obj(callee, T_INT, false); return;
+ case vmIntrinsics::_getLongUnaligned : append_unsafe_get_obj(callee, T_LONG, false); return;
+ case vmIntrinsics::_putShortUnaligned : append_unsafe_put_obj(callee, T_SHORT, false); return;
+ case vmIntrinsics::_putCharUnaligned : append_unsafe_put_obj(callee, T_CHAR, false); return;
+ case vmIntrinsics::_putIntUnaligned : append_unsafe_put_obj(callee, T_INT, false); return;
+ case vmIntrinsics::_putLongUnaligned : append_unsafe_put_obj(callee, T_LONG, false); return;
+ case vmIntrinsics::_getObjectVolatile : append_unsafe_get_obj(callee, T_OBJECT, true); return;
+ case vmIntrinsics::_getBooleanVolatile : append_unsafe_get_obj(callee, T_BOOLEAN, true); return;
+ case vmIntrinsics::_getByteVolatile : append_unsafe_get_obj(callee, T_BYTE, true); return;
+ case vmIntrinsics::_getShortVolatile : append_unsafe_get_obj(callee, T_SHORT, true); return;
+ case vmIntrinsics::_getCharVolatile : append_unsafe_get_obj(callee, T_CHAR, true); return;
+ case vmIntrinsics::_getIntVolatile : append_unsafe_get_obj(callee, T_INT, true); return;
+ case vmIntrinsics::_getLongVolatile : append_unsafe_get_obj(callee, T_LONG, true); return;
+ case vmIntrinsics::_getFloatVolatile : append_unsafe_get_obj(callee, T_FLOAT, true); return;
+ case vmIntrinsics::_getDoubleVolatile : append_unsafe_get_obj(callee, T_DOUBLE, true); return;
+ case vmIntrinsics::_putObjectVolatile : append_unsafe_put_obj(callee, T_OBJECT, true); return;
+ case vmIntrinsics::_putBooleanVolatile : append_unsafe_put_obj(callee, T_BOOLEAN, true); return;
+ case vmIntrinsics::_putByteVolatile : append_unsafe_put_obj(callee, T_BYTE, true); return;
+ case vmIntrinsics::_putShortVolatile : append_unsafe_put_obj(callee, T_SHORT, true); return;
+ case vmIntrinsics::_putCharVolatile : append_unsafe_put_obj(callee, T_CHAR, true); return;
+ case vmIntrinsics::_putIntVolatile : append_unsafe_put_obj(callee, T_INT, true); return;
+ case vmIntrinsics::_putLongVolatile : append_unsafe_put_obj(callee, T_LONG, true); return;
+ case vmIntrinsics::_putFloatVolatile : append_unsafe_put_obj(callee, T_FLOAT, true); return;
+ case vmIntrinsics::_putDoubleVolatile : append_unsafe_put_obj(callee, T_DOUBLE, true); return;
+ case vmIntrinsics::_getByte_raw : append_unsafe_get_raw(callee, T_BYTE ); return;
+ case vmIntrinsics::_getShort_raw : append_unsafe_get_raw(callee, T_SHORT ); return;
+ case vmIntrinsics::_getChar_raw : append_unsafe_get_raw(callee, T_CHAR ); return;
+ case vmIntrinsics::_getInt_raw : append_unsafe_get_raw(callee, T_INT ); return;
+ case vmIntrinsics::_getLong_raw : append_unsafe_get_raw(callee, T_LONG ); return;
+ case vmIntrinsics::_getFloat_raw : append_unsafe_get_raw(callee, T_FLOAT ); return;
+ case vmIntrinsics::_getDouble_raw : append_unsafe_get_raw(callee, T_DOUBLE); return;
+ case vmIntrinsics::_putByte_raw : append_unsafe_put_raw(callee, T_BYTE ); return;
+ case vmIntrinsics::_putShort_raw : append_unsafe_put_raw(callee, T_SHORT ); return;
+ case vmIntrinsics::_putChar_raw : append_unsafe_put_raw(callee, T_CHAR ); return;
+ case vmIntrinsics::_putInt_raw : append_unsafe_put_raw(callee, T_INT ); return;
+ case vmIntrinsics::_putLong_raw : append_unsafe_put_raw(callee, T_LONG ); return;
+ case vmIntrinsics::_putFloat_raw : append_unsafe_put_raw(callee, T_FLOAT ); return;
+ case vmIntrinsics::_putDouble_raw : append_unsafe_put_raw(callee, T_DOUBLE); return;
+ case vmIntrinsics::_putOrderedObject : append_unsafe_put_obj(callee, T_OBJECT, true); return;
+ case vmIntrinsics::_putOrderedInt : append_unsafe_put_obj(callee, T_INT, true); return;
+ case vmIntrinsics::_putOrderedLong : append_unsafe_put_obj(callee, T_LONG, true); return;
+ case vmIntrinsics::_compareAndSwapLong:
+ case vmIntrinsics::_compareAndSwapInt:
+ case vmIntrinsics::_compareAndSwapObject: append_unsafe_CAS(callee); return;
+ case vmIntrinsics::_getAndAddInt:
+ case vmIntrinsics::_getAndAddLong : append_unsafe_get_and_set_obj(callee, true); return;
+ case vmIntrinsics::_getAndSetInt :
+ case vmIntrinsics::_getAndSetLong :
+ case vmIntrinsics::_getAndSetObject : append_unsafe_get_and_set_obj(callee, false); return;
+ default:
+ break;
}
- bool preserves_state = false;
- bool cantrap = true;
- switch (id) {
- case vmIntrinsics::_arraycopy:
- if (!InlineArrayCopy) return false;
- break;
-
-#ifdef TRACE_HAVE_INTRINSICS
- case vmIntrinsics::_classID:
- case vmIntrinsics::_threadID:
- preserves_state = true;
- cantrap = true;
- break;
-
- case vmIntrinsics::_counterTime:
- preserves_state = true;
- cantrap = false;
- break;
-#endif
-
- case vmIntrinsics::_currentTimeMillis:
- case vmIntrinsics::_nanoTime:
- preserves_state = true;
- cantrap = false;
- break;
-
- case vmIntrinsics::_floatToRawIntBits :
- case vmIntrinsics::_intBitsToFloat :
- case vmIntrinsics::_doubleToRawLongBits :
- case vmIntrinsics::_longBitsToDouble :
- if (!InlineMathNatives) return false;
- preserves_state = true;
- cantrap = false;
- break;
-
- case vmIntrinsics::_getClass :
- case vmIntrinsics::_isInstance :
- if (!InlineClassNatives) return false;
- preserves_state = true;
- break;
-
- case vmIntrinsics::_currentThread :
- if (!InlineThreadNatives) return false;
- preserves_state = true;
- cantrap = false;
- break;
-
- case vmIntrinsics::_dabs : // fall through
- case vmIntrinsics::_dsqrt : // fall through
- case vmIntrinsics::_dsin : // fall through
- case vmIntrinsics::_dcos : // fall through
- case vmIntrinsics::_dtan : // fall through
- case vmIntrinsics::_dlog : // fall through
- case vmIntrinsics::_dlog10 : // fall through
- case vmIntrinsics::_dexp : // fall through
- case vmIntrinsics::_dpow : // fall through
- if (!InlineMathNatives) return false;
- cantrap = false;
- preserves_state = true;
- break;
-
- // Use special nodes for Unsafe instructions so we can more easily
- // perform an address-mode optimization on the raw variants
- case vmIntrinsics::_getObject : return append_unsafe_get_obj(callee, T_OBJECT, false);
- case vmIntrinsics::_getBoolean: return append_unsafe_get_obj(callee, T_BOOLEAN, false);
- case vmIntrinsics::_getByte : return append_unsafe_get_obj(callee, T_BYTE, false);
- case vmIntrinsics::_getShort : return append_unsafe_get_obj(callee, T_SHORT, false);
- case vmIntrinsics::_getChar : return append_unsafe_get_obj(callee, T_CHAR, false);
- case vmIntrinsics::_getInt : return append_unsafe_get_obj(callee, T_INT, false);
- case vmIntrinsics::_getLong : return append_unsafe_get_obj(callee, T_LONG, false);
- case vmIntrinsics::_getFloat : return append_unsafe_get_obj(callee, T_FLOAT, false);
- case vmIntrinsics::_getDouble : return append_unsafe_get_obj(callee, T_DOUBLE, false);
-
- case vmIntrinsics::_putObject : return append_unsafe_put_obj(callee, T_OBJECT, false);
- case vmIntrinsics::_putBoolean: return append_unsafe_put_obj(callee, T_BOOLEAN, false);
- case vmIntrinsics::_putByte : return append_unsafe_put_obj(callee, T_BYTE, false);
- case vmIntrinsics::_putShort : return append_unsafe_put_obj(callee, T_SHORT, false);
- case vmIntrinsics::_putChar : return append_unsafe_put_obj(callee, T_CHAR, false);
- case vmIntrinsics::_putInt : return append_unsafe_put_obj(callee, T_INT, false);
- case vmIntrinsics::_putLong : return append_unsafe_put_obj(callee, T_LONG, false);
- case vmIntrinsics::_putFloat : return append_unsafe_put_obj(callee, T_FLOAT, false);
- case vmIntrinsics::_putDouble : return append_unsafe_put_obj(callee, T_DOUBLE, false);
-
- case vmIntrinsics::_getShortUnaligned :
- return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_SHORT, false) : false;
- case vmIntrinsics::_getCharUnaligned :
- return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_CHAR, false) : false;
- case vmIntrinsics::_getIntUnaligned :
- return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_INT, false) : false;
- case vmIntrinsics::_getLongUnaligned :
- return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_LONG, false) : false;
-
- case vmIntrinsics::_putShortUnaligned :
- return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_SHORT, false) : false;
- case vmIntrinsics::_putCharUnaligned :
- return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_CHAR, false) : false;
- case vmIntrinsics::_putIntUnaligned :
- return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_INT, false) : false;
- case vmIntrinsics::_putLongUnaligned :
- return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_LONG, false) : false;
-
- case vmIntrinsics::_getObjectVolatile : return append_unsafe_get_obj(callee, T_OBJECT, true);
- case vmIntrinsics::_getBooleanVolatile: return append_unsafe_get_obj(callee, T_BOOLEAN, true);
- case vmIntrinsics::_getByteVolatile : return append_unsafe_get_obj(callee, T_BYTE, true);
- case vmIntrinsics::_getShortVolatile : return append_unsafe_get_obj(callee, T_SHORT, true);
- case vmIntrinsics::_getCharVolatile : return append_unsafe_get_obj(callee, T_CHAR, true);
- case vmIntrinsics::_getIntVolatile : return append_unsafe_get_obj(callee, T_INT, true);
- case vmIntrinsics::_getLongVolatile : return append_unsafe_get_obj(callee, T_LONG, true);
- case vmIntrinsics::_getFloatVolatile : return append_unsafe_get_obj(callee, T_FLOAT, true);
- case vmIntrinsics::_getDoubleVolatile : return append_unsafe_get_obj(callee, T_DOUBLE, true);
-
- case vmIntrinsics::_putObjectVolatile : return append_unsafe_put_obj(callee, T_OBJECT, true);
- case vmIntrinsics::_putBooleanVolatile: return append_unsafe_put_obj(callee, T_BOOLEAN, true);
- case vmIntrinsics::_putByteVolatile : return append_unsafe_put_obj(callee, T_BYTE, true);
- case vmIntrinsics::_putShortVolatile : return append_unsafe_put_obj(callee, T_SHORT, true);
- case vmIntrinsics::_putCharVolatile : return append_unsafe_put_obj(callee, T_CHAR, true);
- case vmIntrinsics::_putIntVolatile : return append_unsafe_put_obj(callee, T_INT, true);
- case vmIntrinsics::_putLongVolatile : return append_unsafe_put_obj(callee, T_LONG, true);
- case vmIntrinsics::_putFloatVolatile : return append_unsafe_put_obj(callee, T_FLOAT, true);
- case vmIntrinsics::_putDoubleVolatile : return append_unsafe_put_obj(callee, T_DOUBLE, true);
-
- case vmIntrinsics::_getByte_raw : return append_unsafe_get_raw(callee, T_BYTE);
- case vmIntrinsics::_getShort_raw : return append_unsafe_get_raw(callee, T_SHORT);
- case vmIntrinsics::_getChar_raw : return append_unsafe_get_raw(callee, T_CHAR);
- case vmIntrinsics::_getInt_raw : return append_unsafe_get_raw(callee, T_INT);
- case vmIntrinsics::_getLong_raw : return append_unsafe_get_raw(callee, T_LONG);
- case vmIntrinsics::_getFloat_raw : return append_unsafe_get_raw(callee, T_FLOAT);
- case vmIntrinsics::_getDouble_raw : return append_unsafe_get_raw(callee, T_DOUBLE);
-
- case vmIntrinsics::_putByte_raw : return append_unsafe_put_raw(callee, T_BYTE);
- case vmIntrinsics::_putShort_raw : return append_unsafe_put_raw(callee, T_SHORT);
- case vmIntrinsics::_putChar_raw : return append_unsafe_put_raw(callee, T_CHAR);
- case vmIntrinsics::_putInt_raw : return append_unsafe_put_raw(callee, T_INT);
- case vmIntrinsics::_putLong_raw : return append_unsafe_put_raw(callee, T_LONG);
- case vmIntrinsics::_putFloat_raw : return append_unsafe_put_raw(callee, T_FLOAT);
- case vmIntrinsics::_putDouble_raw : return append_unsafe_put_raw(callee, T_DOUBLE);
-
- case vmIntrinsics::_checkIndex :
- if (!InlineNIOCheckIndex) return false;
- preserves_state = true;
- break;
- case vmIntrinsics::_putOrderedObject : return append_unsafe_put_obj(callee, T_OBJECT, true);
- case vmIntrinsics::_putOrderedInt : return append_unsafe_put_obj(callee, T_INT, true);
- case vmIntrinsics::_putOrderedLong : return append_unsafe_put_obj(callee, T_LONG, true);
-
- case vmIntrinsics::_compareAndSwapLong:
- if (!VM_Version::supports_cx8()) return false;
- // fall through
- case vmIntrinsics::_compareAndSwapInt:
- case vmIntrinsics::_compareAndSwapObject:
- append_unsafe_CAS(callee);
- return true;
-
- case vmIntrinsics::_getAndAddInt:
- if (!VM_Version::supports_atomic_getadd4()) {
- return false;
- }
- return append_unsafe_get_and_set_obj(callee, true);
- case vmIntrinsics::_getAndAddLong:
- if (!VM_Version::supports_atomic_getadd8()) {
- return false;
- }
- return append_unsafe_get_and_set_obj(callee, true);
- case vmIntrinsics::_getAndSetInt:
- if (!VM_Version::supports_atomic_getset4()) {
- return false;
- }
- return append_unsafe_get_and_set_obj(callee, false);
- case vmIntrinsics::_getAndSetLong:
- if (!VM_Version::supports_atomic_getset8()) {
- return false;
- }
- return append_unsafe_get_and_set_obj(callee, false);
- case vmIntrinsics::_getAndSetObject:
-#ifdef _LP64
- if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) {
- return false;
- }
- if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) {
- return false;
- }
-#else
- if (!VM_Version::supports_atomic_getset4()) {
- return false;
- }
-#endif
- return append_unsafe_get_and_set_obj(callee, false);
-
- case vmIntrinsics::_Reference_get:
- // Use the intrinsic version of Reference.get() so that the value in
- // the referent field can be registered by the G1 pre-barrier code.
- // Also to prevent commoning reads from this field across safepoint
- // since GC can change its value.
- preserves_state = true;
- break;
-
- case vmIntrinsics::_updateCRC32:
- case vmIntrinsics::_updateBytesCRC32:
- case vmIntrinsics::_updateByteBufferCRC32:
- if (!UseCRC32Intrinsics) return false;
- cantrap = false;
- preserves_state = true;
- break;
-
- case vmIntrinsics::_loadFence :
- case vmIntrinsics::_storeFence:
- case vmIntrinsics::_fullFence :
- break;
-
- default : return false; // do not inline
- }
+
// create intrinsic node
const bool has_receiver = !callee->is_static();
ValueType* result_type = as_ValueType(callee->return_type());
@@ -3621,8 +3475,10 @@
}
}
- Intrinsic* result = new Intrinsic(result_type, id, args, has_receiver, state_before,
- preserves_state, cantrap);
+ Intrinsic* result = new Intrinsic(result_type, callee->intrinsic_id(),
+ args, has_receiver, state_before,
+ vmIntrinsics::preserves_state(id),
+ vmIntrinsics::can_trap(id));
// append instruction & push result
Value value = append_split(result);
if (result_type != voidType) push(result_type, value);
@@ -3630,8 +3486,22 @@
if (callee != method() && profile_return() && result_type->is_object_kind()) {
profile_return_type(result, callee);
}
-
- // done
+}
+
+bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
+ // For calling is_intrinsic_available we need to transition to
+ // the '_thread_in_vm' state because is_intrinsic_available()
+ // does not accesses critical VM-internal data.
+ if (!_compilation->compiler()->is_intrinsic_available(callee->get_Method(), NULL)) {
+ if (!InlineNatives) {
+ // Return false and also set message that the inlining of
+ // intrinsics has been disabled in general.
+ INLINE_BAILOUT("intrinsic method inlining disabled");
+ } else {
+ return false;
+ }
+ }
+ build_graph_for_intrinsic(callee);
return true;
}
@@ -4224,58 +4094,46 @@
_scope_data = scope_data()->parent();
}
-bool GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) {
- if (InlineUnsafeOps) {
- Values* args = state()->pop_arguments(callee->arg_size());
- null_check(args->at(0));
- Instruction* offset = args->at(2);
+void GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) {
+ Values* args = state()->pop_arguments(callee->arg_size());
+ null_check(args->at(0));
+ Instruction* offset = args->at(2);
#ifndef _LP64
- offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+ offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
#endif
- Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile));
- push(op->type(), op);
- compilation()->set_has_unsafe_access(true);
- }
- return InlineUnsafeOps;
+ Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile));
+ push(op->type(), op);
+ compilation()->set_has_unsafe_access(true);
}
-bool GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) {
- if (InlineUnsafeOps) {
- Values* args = state()->pop_arguments(callee->arg_size());
- null_check(args->at(0));
- Instruction* offset = args->at(2);
+void GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) {
+ Values* args = state()->pop_arguments(callee->arg_size());
+ null_check(args->at(0));
+ Instruction* offset = args->at(2);
#ifndef _LP64
- offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+ offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
#endif
- Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, args->at(3), is_volatile));
- compilation()->set_has_unsafe_access(true);
- kill_all();
- }
- return InlineUnsafeOps;
+ Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, args->at(3), is_volatile));
+ compilation()->set_has_unsafe_access(true);
+ kill_all();
}
-bool GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) {
- if (InlineUnsafeOps) {
- Values* args = state()->pop_arguments(callee->arg_size());
- null_check(args->at(0));
- Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false));
- push(op->type(), op);
- compilation()->set_has_unsafe_access(true);
- }
- return InlineUnsafeOps;
+void GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) {
+ Values* args = state()->pop_arguments(callee->arg_size());
+ null_check(args->at(0));
+ Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false));
+ push(op->type(), op);
+ compilation()->set_has_unsafe_access(true);
}
-bool GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) {
- if (InlineUnsafeOps) {
- Values* args = state()->pop_arguments(callee->arg_size());
- null_check(args->at(0));
- Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2)));
- compilation()->set_has_unsafe_access(true);
- }
- return InlineUnsafeOps;
+void GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) {
+ Values* args = state()->pop_arguments(callee->arg_size());
+ null_check(args->at(0));
+ Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2)));
+ compilation()->set_has_unsafe_access(true);
}
@@ -4352,21 +4210,18 @@
}
}
-bool GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) {
- if (InlineUnsafeOps) {
- Values* args = state()->pop_arguments(callee->arg_size());
- BasicType t = callee->return_type()->basic_type();
- null_check(args->at(0));
- Instruction* offset = args->at(2);
+void GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) {
+ Values* args = state()->pop_arguments(callee->arg_size());
+ BasicType t = callee->return_type()->basic_type();
+ null_check(args->at(0));
+ Instruction* offset = args->at(2);
#ifndef _LP64
- offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+ offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
#endif
- Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add));
- compilation()->set_has_unsafe_access(true);
- kill_all();
- push(op->type(), op);
- }
- return InlineUnsafeOps;
+ Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add));
+ compilation()->set_has_unsafe_access(true);
+ kill_all();
+ push(op->type(), op);
}
#ifndef PRODUCT
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -339,6 +339,8 @@
void inline_sync_entry(Value lock, BlockBegin* sync_handler);
void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false);
+ void build_graph_for_intrinsic(ciMethod* callee);
+
// inliners
bool try_inline( ciMethod* callee, bool holder_known, Bytecodes::Code bc = Bytecodes::_illegal, Value receiver = NULL);
bool try_inline_intrinsics(ciMethod* callee);
@@ -364,12 +366,12 @@
void pop_scope();
void pop_scope_for_jsr();
- bool append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile);
- bool append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile);
- bool append_unsafe_get_raw(ciMethod* callee, BasicType t);
- bool append_unsafe_put_raw(ciMethod* callee, BasicType t);
+ void append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile);
+ void append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile);
+ void append_unsafe_get_raw(ciMethod* callee, BasicType t);
+ void append_unsafe_put_raw(ciMethod* callee, BasicType t);
void append_unsafe_CAS(ciMethod* callee);
- bool append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add);
+ void append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add);
void print_inlining(ciMethod* callee, const char* msg = NULL, bool success = true);
--- a/hotspot/src/share/vm/c1/c1_ValueType.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/c1/c1_ValueType.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -153,7 +153,19 @@
case T_FLOAT : return new FloatConstant (value.as_float ());
case T_DOUBLE : return new DoubleConstant(value.as_double());
case T_ARRAY : // fall through (ciConstant doesn't have an array accessor)
- case T_OBJECT : return new ObjectConstant(value.as_object());
+ case T_OBJECT : {
+ // TODO: Common the code with GraphBuilder::load_constant?
+ ciObject* obj = value.as_object();
+ if (obj->is_null_object())
+ return objectNull;
+ if (obj->is_loaded()) {
+ if (obj->is_array())
+ return new ArrayConstant(obj->as_array());
+ else if (obj->is_instance())
+ return new InstanceConstant(obj->as_instance());
+ }
+ return new ObjectConstant(obj);
+ }
}
ShouldNotReachHere();
return illegalType;
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -324,6 +324,319 @@
return vmIntrinsics::_none;
}
+bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) {
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+ switch(id) {
+#ifdef TRACE_HAVE_INTRINSICS
+ case vmIntrinsics::_classID:
+ case vmIntrinsics::_threadID:
+ case vmIntrinsics::_counterTime:
+#endif
+ case vmIntrinsics::_currentTimeMillis:
+ case vmIntrinsics::_nanoTime:
+ case vmIntrinsics::_floatToRawIntBits:
+ case vmIntrinsics::_intBitsToFloat:
+ case vmIntrinsics::_doubleToRawLongBits:
+ case vmIntrinsics::_longBitsToDouble:
+ case vmIntrinsics::_getClass:
+ case vmIntrinsics::_isInstance:
+ case vmIntrinsics::_currentThread:
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_dsqrt:
+ case vmIntrinsics::_dsin:
+ case vmIntrinsics::_dcos:
+ case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dlog:
+ case vmIntrinsics::_dlog10:
+ case vmIntrinsics::_dexp:
+ case vmIntrinsics::_dpow:
+ case vmIntrinsics::_checkIndex:
+ case vmIntrinsics::_Reference_get:
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool vmIntrinsics::can_trap(vmIntrinsics::ID id) {
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+ switch(id) {
+#ifdef TRACE_HAVE_INTRINSICS
+ case vmIntrinsics::_counterTime:
+#endif
+ case vmIntrinsics::_currentTimeMillis:
+ case vmIntrinsics::_nanoTime:
+ case vmIntrinsics::_floatToRawIntBits:
+ case vmIntrinsics::_intBitsToFloat:
+ case vmIntrinsics::_doubleToRawLongBits:
+ case vmIntrinsics::_longBitsToDouble:
+ case vmIntrinsics::_currentThread:
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_dsqrt:
+ case vmIntrinsics::_dsin:
+ case vmIntrinsics::_dcos:
+ case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dlog:
+ case vmIntrinsics::_dlog10:
+ case vmIntrinsics::_dexp:
+ case vmIntrinsics::_dpow:
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ return false;
+ default:
+ return true;
+ }
+}
+
+bool vmIntrinsics::does_virtual_dispatch(vmIntrinsics::ID id) {
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+ switch(id) {
+ case vmIntrinsics::_hashCode:
+ case vmIntrinsics::_clone:
+ return true;
+ break;
+ default:
+ return false;
+ }
+}
+
+int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) {
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+ switch (id) {
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ return 1;
+ case vmIntrinsics::_digestBase_implCompressMB:
+ return 3;
+ default:
+ return 0;
+ }
+}
+
+bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+ switch (id) {
+ case vmIntrinsics::_isInstance:
+ case vmIntrinsics::_isAssignableFrom:
+ case vmIntrinsics::_getModifiers:
+ case vmIntrinsics::_isInterface:
+ case vmIntrinsics::_isArray:
+ case vmIntrinsics::_isPrimitive:
+ case vmIntrinsics::_getSuperclass:
+ case vmIntrinsics::_Class_cast:
+ case vmIntrinsics::_getLength:
+ case vmIntrinsics::_newArray:
+ if (!InlineClassNatives) return true;
+ break;
+ case vmIntrinsics::_currentThread:
+ case vmIntrinsics::_isInterrupted:
+ if (!InlineThreadNatives) return true;
+ break;
+ case vmIntrinsics::_floatToRawIntBits:
+ case vmIntrinsics::_intBitsToFloat:
+ case vmIntrinsics::_doubleToRawLongBits:
+ case vmIntrinsics::_longBitsToDouble:
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_dsqrt:
+ case vmIntrinsics::_dsin:
+ case vmIntrinsics::_dcos:
+ case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dlog:
+ case vmIntrinsics::_dexp:
+ case vmIntrinsics::_dpow:
+ case vmIntrinsics::_dlog10:
+ case vmIntrinsics::_datan2:
+ case vmIntrinsics::_min:
+ case vmIntrinsics::_max:
+ case vmIntrinsics::_floatToIntBits:
+ case vmIntrinsics::_doubleToLongBits:
+ if (!InlineMathNatives) return true;
+ break;
+ case vmIntrinsics::_arraycopy:
+ if (!InlineArrayCopy) return true;
+ break;
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ if (!UseCRC32Intrinsics) return true;
+ break;
+ case vmIntrinsics::_getObject:
+ case vmIntrinsics::_getBoolean:
+ case vmIntrinsics::_getByte:
+ case vmIntrinsics::_getShort:
+ case vmIntrinsics::_getChar:
+ case vmIntrinsics::_getInt:
+ case vmIntrinsics::_getLong:
+ case vmIntrinsics::_getFloat:
+ case vmIntrinsics::_getDouble:
+ case vmIntrinsics::_putObject:
+ case vmIntrinsics::_putBoolean:
+ case vmIntrinsics::_putByte:
+ case vmIntrinsics::_putShort:
+ case vmIntrinsics::_putChar:
+ case vmIntrinsics::_putInt:
+ case vmIntrinsics::_putLong:
+ case vmIntrinsics::_putFloat:
+ case vmIntrinsics::_putDouble:
+ case vmIntrinsics::_getObjectVolatile:
+ case vmIntrinsics::_getBooleanVolatile:
+ case vmIntrinsics::_getByteVolatile:
+ case vmIntrinsics::_getShortVolatile:
+ case vmIntrinsics::_getCharVolatile:
+ case vmIntrinsics::_getIntVolatile:
+ case vmIntrinsics::_getLongVolatile:
+ case vmIntrinsics::_getFloatVolatile:
+ case vmIntrinsics::_getDoubleVolatile:
+ case vmIntrinsics::_putObjectVolatile:
+ case vmIntrinsics::_putBooleanVolatile:
+ case vmIntrinsics::_putByteVolatile:
+ case vmIntrinsics::_putShortVolatile:
+ case vmIntrinsics::_putCharVolatile:
+ case vmIntrinsics::_putIntVolatile:
+ case vmIntrinsics::_putLongVolatile:
+ case vmIntrinsics::_putFloatVolatile:
+ case vmIntrinsics::_putDoubleVolatile:
+ case vmIntrinsics::_getByte_raw:
+ case vmIntrinsics::_getShort_raw:
+ case vmIntrinsics::_getChar_raw:
+ case vmIntrinsics::_getInt_raw:
+ case vmIntrinsics::_getLong_raw:
+ case vmIntrinsics::_getFloat_raw:
+ case vmIntrinsics::_getDouble_raw:
+ case vmIntrinsics::_putByte_raw:
+ case vmIntrinsics::_putShort_raw:
+ case vmIntrinsics::_putChar_raw:
+ case vmIntrinsics::_putInt_raw:
+ case vmIntrinsics::_putLong_raw:
+ case vmIntrinsics::_putFloat_raw:
+ case vmIntrinsics::_putDouble_raw:
+ case vmIntrinsics::_putOrderedObject:
+ case vmIntrinsics::_putOrderedLong:
+ case vmIntrinsics::_putOrderedInt:
+ case vmIntrinsics::_getAndAddInt:
+ case vmIntrinsics::_getAndAddLong:
+ case vmIntrinsics::_getAndSetInt:
+ case vmIntrinsics::_getAndSetLong:
+ case vmIntrinsics::_getAndSetObject:
+ if (!InlineUnsafeOps) return true;
+ break;
+ case vmIntrinsics::_getShortUnaligned:
+ case vmIntrinsics::_getCharUnaligned:
+ case vmIntrinsics::_getIntUnaligned:
+ case vmIntrinsics::_getLongUnaligned:
+ case vmIntrinsics::_putShortUnaligned:
+ case vmIntrinsics::_putCharUnaligned:
+ case vmIntrinsics::_putIntUnaligned:
+ case vmIntrinsics::_putLongUnaligned:
+ case vmIntrinsics::_allocateInstance:
+ case vmIntrinsics::_getAddress_raw:
+ case vmIntrinsics::_putAddress_raw:
+ if (!InlineUnsafeOps || !UseUnalignedAccesses) return true;
+ break;
+ case vmIntrinsics::_hashCode:
+ if (!InlineObjectHash) return true;
+ break;
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ if (!UseAESIntrinsics) return true;
+ break;
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ if (!UseAESIntrinsics) return true;
+ break;
+ case vmIntrinsics::_sha_implCompress:
+ if (!UseSHA1Intrinsics) return true;
+ break;
+ case vmIntrinsics::_sha2_implCompress:
+ if (!UseSHA256Intrinsics) return true;
+ break;
+ case vmIntrinsics::_sha5_implCompress:
+ if (!UseSHA512Intrinsics) return true;
+ break;
+ case vmIntrinsics::_digestBase_implCompressMB:
+ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return true;
+ break;
+ case vmIntrinsics::_ghash_processBlocks:
+ if (!UseGHASHIntrinsics) return true;
+ break;
+ case vmIntrinsics::_updateBytesCRC32C:
+ case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ if (!UseCRC32CIntrinsics) return true;
+ break;
+ case vmIntrinsics::_copyMemory:
+ if (!InlineArrayCopy || !InlineUnsafeOps) return true;
+ break;
+#ifdef COMPILER1
+ case vmIntrinsics::_checkIndex:
+ if (!InlineNIOCheckIndex) return true;
+ break;
+#endif // COMPILER1
+#ifdef COMPILER2
+ case vmIntrinsics::_clone:
+ case vmIntrinsics::_copyOf:
+ case vmIntrinsics::_copyOfRange:
+ // These intrinsics use both the objectcopy and the arraycopy
+ // intrinsic mechanism.
+ if (!InlineObjectCopy || !InlineArrayCopy) return true;
+ break;
+ case vmIntrinsics::_compareTo:
+ if (!SpecialStringCompareTo) return true;
+ break;
+ case vmIntrinsics::_indexOf:
+ if (!SpecialStringIndexOf) return true;
+ break;
+ case vmIntrinsics::_equals:
+ if (!SpecialStringEquals) return true;
+ break;
+ case vmIntrinsics::_equalsC:
+ if (!SpecialArraysEquals) return true;
+ break;
+ case vmIntrinsics::_encodeISOArray:
+ if (!SpecialEncodeISOArray) return true;
+ break;
+ case vmIntrinsics::_getCallerClass:
+ if (!InlineReflectionGetCallerClass) return true;
+ break;
+ case vmIntrinsics::_multiplyToLen:
+ if (!UseMultiplyToLenIntrinsic) return true;
+ break;
+ case vmIntrinsics::_squareToLen:
+ if (!UseSquareToLenIntrinsic) return true;
+ break;
+ case vmIntrinsics::_mulAdd:
+ if (!UseMulAddIntrinsic) return true;
+ break;
+ case vmIntrinsics::_montgomeryMultiply:
+ if (!UseMontgomeryMultiplyIntrinsic) return true;
+ break;
+ case vmIntrinsics::_montgomerySquare:
+ if (!UseMontgomerySquareIntrinsic) return true;
+ break;
+ case vmIntrinsics::_addExactI:
+ case vmIntrinsics::_addExactL:
+ case vmIntrinsics::_decrementExactI:
+ case vmIntrinsics::_decrementExactL:
+ case vmIntrinsics::_incrementExactI:
+ case vmIntrinsics::_incrementExactL:
+ case vmIntrinsics::_multiplyExactI:
+ case vmIntrinsics::_multiplyExactL:
+ case vmIntrinsics::_negateExactI:
+ case vmIntrinsics::_negateExactL:
+ case vmIntrinsics::_subtractExactI:
+ case vmIntrinsics::_subtractExactL:
+ if (!UseMathExactIntrinsics || !InlineMathNatives) return true;
+ break;
+#endif // COMPILER2
+ default:
+ return false;
+ }
+
+ return false;
+}
#define VM_INTRINSIC_INITIALIZE(id, klass, name, sig, flags) #id "\0"
static const char* vm_intrinsic_name_bodies =
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -1368,6 +1368,26 @@
// Raw conversion:
static ID for_raw_conversion(BasicType src, BasicType dest);
+
+ // The methods below provide information related to compiling intrinsics.
+
+ // (1) Information needed by the C1 compiler.
+
+ static bool preserves_state(vmIntrinsics::ID id);
+ static bool can_trap(vmIntrinsics::ID id);
+
+ // (2) Information needed by the C2 compiler.
+
+ // Returns true if the intrinsic for method 'method' will perform a virtual dispatch.
+ static bool does_virtual_dispatch(vmIntrinsics::ID id);
+ // A return value larger than 0 indicates that the intrinsic for method
+ // 'method' requires predicated logic.
+ static int predicates_needed(vmIntrinsics::ID id);
+
+ // Returns true if an intrinsic is disabled by command-line flags and
+ // false otherwise. Implements functionality common to the C1
+ // and the C2 compiler.
+ static bool is_disabled_by_flags(vmIntrinsics::ID id);
};
#endif // SHARE_VM_CLASSFILE_VMSYMBOLS_HPP
--- a/hotspot/src/share/vm/compiler/abstractCompiler.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/compiler/abstractCompiler.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -66,6 +66,58 @@
virtual bool supports_osr () { return true; }
virtual bool can_compile_method(methodHandle method) { return true; }
+ // Determine if the current compiler provides an intrinsic
+ // for method 'method'. An intrinsic is available if:
+ // - the intrinsic is enabled (by using the appropriate command-line flag) and
+ // - the platform on which the VM is running supports the intrinsic
+ // (i.e., the platform provides the instructions necessary for the compiler
+ // to generate the intrinsic code).
+ //
+ // The second parameter, 'compilation_context', is needed to implement functionality
+ // related to the DisableIntrinsic command-line flag. The DisableIntrinsic flag can
+ // be used to prohibit the C2 compiler (but not the C1 compiler) to use an intrinsic.
+ // There are three ways to disable an intrinsic using the DisableIntrinsic flag:
+ //
+ // (1) -XX:DisableIntrinsic=_hashCode,_getClass
+ // Disables intrinsification of _hashCode and _getClass globally
+ // (i.e., the intrinsified version the methods will not be used at all).
+ // (2) -XX:CompileCommand=option,aClass::aMethod,ccstr,DisableIntrinsic,_hashCode
+ // Disables intrinsification of _hashCode if it is called from
+ // aClass::aMethod (but not for any other call site of _hashCode)
+ // (3) -XX:CompileCommand=option,java.lang.ref.Reference::get,ccstr,DisableIntrinsic,_Reference_get
+ // Some methods are not compiled by C2. Instead, the C2 compiler
+ // returns directly the intrinsified version of these methods.
+ // The command above forces C2 to compile _Reference_get, but
+ // allows using the intrinsified version of _Reference_get at all
+ // other call sites.
+ //
+ // From the modes above, (1) disable intrinsics globally, (2) and (3)
+ // disable intrinsics on a per-method basis. In cases (2) and (3) the
+ // compilation context is aClass::aMethod and java.lang.ref.Reference::get,
+ // respectively.
+ virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context) {
+ return false;
+ }
+
+ // Determines if an intrinsic is supported by the compiler, that is,
+ // the compiler provides the instructions necessary to generate
+ // the intrinsic code for method 'method'.
+ //
+ // The 'is_intrinsic_supported' method is a white list, that is,
+ // by default no intrinsics are supported by a compiler except
+ // the ones listed in the method. Overriding methods should conform
+ // to this behavior.
+ virtual bool is_intrinsic_supported(methodHandle method) {
+ return false;
+ }
+
+ // Implements compiler-specific processing of command-line flags.
+ // Processing of command-line flags common to all compilers is implemented
+ // in vmIntrinsicss::is_disabled_by_flag.
+ virtual bool is_intrinsic_disabled_by_flag(methodHandle method) {
+ return false;
+ }
+
// Compiler type queries.
bool is_c1() { return _type == c1; }
bool is_c2() { return _type == c2; }
--- a/hotspot/src/share/vm/opto/c2compiler.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -79,7 +79,6 @@
return OptoRuntime::generate(thread->env());
}
-
void C2Compiler::initialize() {
// The first compiler thread that gets here will initialize the
// small amount of global state (and runtime stubs) that C2 needs.
@@ -154,11 +153,361 @@
}
}
-
void C2Compiler::print_timers() {
Compile::print_timers();
}
+bool C2Compiler::is_intrinsic_available(methodHandle method, methodHandle compilation_context) {
+ // Assume a non-virtual dispatch. A virtual dispatch is
+ // possible for only a limited set of available intrinsics whereas
+ // a non-virtual dispatch is possible for all available intrinsics.
+ return is_intrinsic_supported(method, false) &&
+ !is_intrinsic_disabled_by_flag(method, compilation_context);
+}
+
+bool C2Compiler::is_intrinsic_supported(methodHandle method, bool is_virtual) {
+ vmIntrinsics::ID id = method->intrinsic_id();
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+ if (id < vmIntrinsics::FIRST_ID || id >= vmIntrinsics::LAST_COMPILER_INLINE) {
+ return false;
+ }
+
+ // Only Object.hashCode and Object.clone intrinsics implement also a virtual
+ // dispatch because calling both methods is expensive but both methods are
+ // frequently overridden. All other intrinsics implement only a non-virtual
+ // dispatch.
+ if (is_virtual) {
+ switch (id) {
+ case vmIntrinsics::_hashCode:
+ case vmIntrinsics::_clone:
+ break;
+ default:
+ return false;
+ }
+ }
+
+ switch (id) {
+ case vmIntrinsics::_compareTo:
+ if (!Matcher::match_rule_supported(Op_StrComp)) return false;
+ break;
+ case vmIntrinsics::_equals:
+ if (!Matcher::match_rule_supported(Op_StrEquals)) return false;
+ break;
+ case vmIntrinsics::_equalsC:
+ if (!Matcher::match_rule_supported(Op_AryEq)) return false;
+ break;
+ case vmIntrinsics::_copyMemory:
+ if (StubRoutines::unsafe_arraycopy() == NULL) return false;
+ break;
+ case vmIntrinsics::_encodeISOArray:
+ if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return false;
+ break;
+ case vmIntrinsics::_bitCount_i:
+ if (!Matcher::match_rule_supported(Op_PopCountI)) return false;
+ break;
+ case vmIntrinsics::_bitCount_l:
+ if (!Matcher::match_rule_supported(Op_PopCountL)) return false;
+ break;
+ case vmIntrinsics::_numberOfLeadingZeros_i:
+ if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false;
+ break;
+ case vmIntrinsics::_numberOfLeadingZeros_l:
+ if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false;
+ break;
+ case vmIntrinsics::_numberOfTrailingZeros_i:
+ if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false;
+ break;
+ case vmIntrinsics::_numberOfTrailingZeros_l:
+ if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
+ break;
+ case vmIntrinsics::_reverseBytes_c:
+ if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false;
+ break;
+ case vmIntrinsics::_reverseBytes_s:
+ if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return false;
+ break;
+ case vmIntrinsics::_reverseBytes_i:
+ if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return false;
+ break;
+ case vmIntrinsics::_reverseBytes_l:
+ if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return false;
+ break;
+ case vmIntrinsics::_compareAndSwapObject:
+#ifdef _LP64
+ if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return false;
+#endif
+ break;
+ case vmIntrinsics::_compareAndSwapLong:
+ if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return false;
+ break;
+ case vmIntrinsics::_getAndAddInt:
+ if (!Matcher::match_rule_supported(Op_GetAndAddI)) return false;
+ break;
+ case vmIntrinsics::_getAndAddLong:
+ if (!Matcher::match_rule_supported(Op_GetAndAddL)) return false;
+ break;
+ case vmIntrinsics::_getAndSetInt:
+ if (!Matcher::match_rule_supported(Op_GetAndSetI)) return false;
+ break;
+ case vmIntrinsics::_getAndSetLong:
+ if (!Matcher::match_rule_supported(Op_GetAndSetL)) return false;
+ break;
+ case vmIntrinsics::_getAndSetObject:
+#ifdef _LP64
+ if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return false;
+ if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return false;
+ break;
+#else
+ if (!Matcher::match_rule_supported(Op_GetAndSetP)) return false;
+ break;
+#endif
+ case vmIntrinsics::_incrementExactI:
+ case vmIntrinsics::_addExactI:
+ if (!Matcher::match_rule_supported(Op_OverflowAddI)) return false;
+ break;
+ case vmIntrinsics::_incrementExactL:
+ case vmIntrinsics::_addExactL:
+ if (!Matcher::match_rule_supported(Op_OverflowAddL)) return false;
+ break;
+ case vmIntrinsics::_decrementExactI:
+ case vmIntrinsics::_subtractExactI:
+ if (!Matcher::match_rule_supported(Op_OverflowSubI)) return false;
+ break;
+ case vmIntrinsics::_decrementExactL:
+ case vmIntrinsics::_subtractExactL:
+ if (!Matcher::match_rule_supported(Op_OverflowSubL)) return false;
+ break;
+ case vmIntrinsics::_negateExactI:
+ if (!Matcher::match_rule_supported(Op_OverflowSubI)) return false;
+ break;
+ case vmIntrinsics::_negateExactL:
+ if (!Matcher::match_rule_supported(Op_OverflowSubL)) return false;
+ break;
+ case vmIntrinsics::_multiplyExactI:
+ if (!Matcher::match_rule_supported(Op_OverflowMulI)) return false;
+ break;
+ case vmIntrinsics::_multiplyExactL:
+ if (!Matcher::match_rule_supported(Op_OverflowMulL)) return false;
+ break;
+ case vmIntrinsics::_getCallerClass:
+ if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return false;
+ break;
+ case vmIntrinsics::_hashCode:
+ case vmIntrinsics::_identityHashCode:
+ case vmIntrinsics::_getClass:
+ case vmIntrinsics::_dsin:
+ case vmIntrinsics::_dcos:
+ case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_datan2:
+ case vmIntrinsics::_dsqrt:
+ case vmIntrinsics::_dexp:
+ case vmIntrinsics::_dlog:
+ case vmIntrinsics::_dlog10:
+ case vmIntrinsics::_dpow:
+ case vmIntrinsics::_min:
+ case vmIntrinsics::_max:
+ case vmIntrinsics::_arraycopy:
+ case vmIntrinsics::_indexOf:
+ case vmIntrinsics::_getObject:
+ case vmIntrinsics::_getBoolean:
+ case vmIntrinsics::_getByte:
+ case vmIntrinsics::_getShort:
+ case vmIntrinsics::_getChar:
+ case vmIntrinsics::_getInt:
+ case vmIntrinsics::_getLong:
+ case vmIntrinsics::_getFloat:
+ case vmIntrinsics::_getDouble:
+ case vmIntrinsics::_putObject:
+ case vmIntrinsics::_putBoolean:
+ case vmIntrinsics::_putByte:
+ case vmIntrinsics::_putShort:
+ case vmIntrinsics::_putChar:
+ case vmIntrinsics::_putInt:
+ case vmIntrinsics::_putLong:
+ case vmIntrinsics::_putFloat:
+ case vmIntrinsics::_putDouble:
+ case vmIntrinsics::_getByte_raw:
+ case vmIntrinsics::_getShort_raw:
+ case vmIntrinsics::_getChar_raw:
+ case vmIntrinsics::_getInt_raw:
+ case vmIntrinsics::_getLong_raw:
+ case vmIntrinsics::_getFloat_raw:
+ case vmIntrinsics::_getDouble_raw:
+ case vmIntrinsics::_getAddress_raw:
+ case vmIntrinsics::_putByte_raw:
+ case vmIntrinsics::_putShort_raw:
+ case vmIntrinsics::_putChar_raw:
+ case vmIntrinsics::_putInt_raw:
+ case vmIntrinsics::_putLong_raw:
+ case vmIntrinsics::_putFloat_raw:
+ case vmIntrinsics::_putDouble_raw:
+ case vmIntrinsics::_putAddress_raw:
+ case vmIntrinsics::_getObjectVolatile:
+ case vmIntrinsics::_getBooleanVolatile:
+ case vmIntrinsics::_getByteVolatile:
+ case vmIntrinsics::_getShortVolatile:
+ case vmIntrinsics::_getCharVolatile:
+ case vmIntrinsics::_getIntVolatile:
+ case vmIntrinsics::_getLongVolatile:
+ case vmIntrinsics::_getFloatVolatile:
+ case vmIntrinsics::_getDoubleVolatile:
+ case vmIntrinsics::_putObjectVolatile:
+ case vmIntrinsics::_putBooleanVolatile:
+ case vmIntrinsics::_putByteVolatile:
+ case vmIntrinsics::_putShortVolatile:
+ case vmIntrinsics::_putCharVolatile:
+ case vmIntrinsics::_putIntVolatile:
+ case vmIntrinsics::_putLongVolatile:
+ case vmIntrinsics::_putFloatVolatile:
+ case vmIntrinsics::_putDoubleVolatile:
+ case vmIntrinsics::_getShortUnaligned:
+ case vmIntrinsics::_getCharUnaligned:
+ case vmIntrinsics::_getIntUnaligned:
+ case vmIntrinsics::_getLongUnaligned:
+ case vmIntrinsics::_putShortUnaligned:
+ case vmIntrinsics::_putCharUnaligned:
+ case vmIntrinsics::_putIntUnaligned:
+ case vmIntrinsics::_putLongUnaligned:
+ case vmIntrinsics::_compareAndSwapInt:
+ case vmIntrinsics::_putOrderedObject:
+ case vmIntrinsics::_putOrderedInt:
+ case vmIntrinsics::_putOrderedLong:
+ case vmIntrinsics::_loadFence:
+ case vmIntrinsics::_storeFence:
+ case vmIntrinsics::_fullFence:
+ case vmIntrinsics::_currentThread:
+ case vmIntrinsics::_isInterrupted:
+#ifdef TRACE_HAVE_INTRINSICS
+ case vmIntrinsics::_classID:
+ case vmIntrinsics::_threadID:
+ case vmIntrinsics::_counterTime:
+#endif
+ case vmIntrinsics::_currentTimeMillis:
+ case vmIntrinsics::_nanoTime:
+ case vmIntrinsics::_allocateInstance:
+ case vmIntrinsics::_newArray:
+ case vmIntrinsics::_getLength:
+ case vmIntrinsics::_copyOf:
+ case vmIntrinsics::_copyOfRange:
+ case vmIntrinsics::_clone:
+ case vmIntrinsics::_isAssignableFrom:
+ case vmIntrinsics::_isInstance:
+ case vmIntrinsics::_getModifiers:
+ case vmIntrinsics::_isInterface:
+ case vmIntrinsics::_isArray:
+ case vmIntrinsics::_isPrimitive:
+ case vmIntrinsics::_getSuperclass:
+ case vmIntrinsics::_getClassAccessFlags:
+ case vmIntrinsics::_floatToRawIntBits:
+ case vmIntrinsics::_floatToIntBits:
+ case vmIntrinsics::_intBitsToFloat:
+ case vmIntrinsics::_doubleToRawLongBits:
+ case vmIntrinsics::_doubleToLongBits:
+ case vmIntrinsics::_longBitsToDouble:
+ case vmIntrinsics::_Reference_get:
+ case vmIntrinsics::_Class_cast:
+ case vmIntrinsics::_aescrypt_encryptBlock:
+ case vmIntrinsics::_aescrypt_decryptBlock:
+ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+ case vmIntrinsics::_sha_implCompress:
+ case vmIntrinsics::_sha2_implCompress:
+ case vmIntrinsics::_sha5_implCompress:
+ case vmIntrinsics::_digestBase_implCompressMB:
+ case vmIntrinsics::_multiplyToLen:
+ case vmIntrinsics::_squareToLen:
+ case vmIntrinsics::_mulAdd:
+ case vmIntrinsics::_montgomeryMultiply:
+ case vmIntrinsics::_montgomerySquare:
+ case vmIntrinsics::_ghash_processBlocks:
+ case vmIntrinsics::_updateCRC32:
+ case vmIntrinsics::_updateBytesCRC32:
+ case vmIntrinsics::_updateByteBufferCRC32:
+ case vmIntrinsics::_updateBytesCRC32C:
+ case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ case vmIntrinsics::_profileBoolean:
+ case vmIntrinsics::_isCompileConstant:
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+bool C2Compiler::is_intrinsic_disabled_by_flag(methodHandle method, methodHandle compilation_context) {
+ vmIntrinsics::ID id = method->intrinsic_id();
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+ if (vmIntrinsics::is_disabled_by_flags(method->intrinsic_id())) {
+ return true;
+ }
+
+ // Check if the intrinsic corresponding to 'method' has been disabled on
+ // the command line by using the DisableIntrinsic flag (either globally
+ // or on a per-method level, see src/share/vm/compiler/abstractCompiler.hpp
+ // for details).
+ // Usually, the compilation context is the caller of the method 'method'.
+ // The only case when for a non-recursive method 'method' the compilation context
+ // is not the caller of the 'method' (but it is the method itself) is
+ // java.lang.ref.Referene::get.
+ // For java.lang.ref.Reference::get, the intrinsic version is used
+ // instead of the C2-compiled version so that the value in the referent
+ // field can be registered by the G1 pre-barrier code. The intrinsified
+ // version of Reference::get also adds a memory barrier to prevent
+ // commoning reads from the referent field across safepoint since GC
+ // can change the referent field's value. See Compile::Compile()
+ // in src/share/vm/opto/compile.cpp for more details.
+ ccstr disable_intr = NULL;
+ if ((DisableIntrinsic[0] != '\0' && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
+ (!compilation_context.is_null() &&
+ CompilerOracle::has_option_value(compilation_context, "DisableIntrinsic", disable_intr) &&
+ strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)
+ ) {
+ return true;
+ }
+
+ // -XX:-InlineNatives disables nearly all intrinsics except the ones listed in
+ // the following switch statement.
+ if (!InlineNatives) {
+ switch (id) {
+ case vmIntrinsics::_indexOf:
+ case vmIntrinsics::_compareTo:
+ case vmIntrinsics::_equals:
+ case vmIntrinsics::_equalsC:
+ case vmIntrinsics::_getAndAddInt:
+ case vmIntrinsics::_getAndAddLong:
+ case vmIntrinsics::_getAndSetInt:
+ case vmIntrinsics::_getAndSetLong:
+ case vmIntrinsics::_getAndSetObject:
+ case vmIntrinsics::_loadFence:
+ case vmIntrinsics::_storeFence:
+ case vmIntrinsics::_fullFence:
+ case vmIntrinsics::_Reference_get:
+ break;
+ default:
+ return true;
+ }
+ }
+
+ if (!InlineUnsafeOps) {
+ switch (id) {
+ case vmIntrinsics::_loadFence:
+ case vmIntrinsics::_storeFence:
+ case vmIntrinsics::_fullFence:
+ case vmIntrinsics::_compareAndSwapObject:
+ case vmIntrinsics::_compareAndSwapLong:
+ case vmIntrinsics::_compareAndSwapInt:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
int C2Compiler::initial_code_buffer_size() {
assert(SegmentedCodeCache, "Should be only used with a segmented code cache");
return Compile::MAX_inst_size + Compile::MAX_locs_size + initial_const_capacity;
--- a/hotspot/src/share/vm/opto/c2compiler.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/opto/c2compiler.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -36,7 +36,6 @@
// Name
const char *name() { return "C2"; }
-
void initialize();
// Compilation entry point for methods
@@ -52,6 +51,26 @@
// Print compilation timers and statistics
void print_timers();
+ // Check the availability of an intrinsic for 'method' given a compilation context.
+ virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context);
+
+ // Return true if the intrinsification of a method supported by the compiler
+ // assuming a non-virtual dispatch. Return false otherwise.
+ virtual bool is_intrinsic_supported(methodHandle method) {
+ return is_intrinsic_supported(method, false);
+ }
+
+ // Check if the compiler supports an intrinsic for 'method' given the
+ // the dispatch mode specified by the 'is_virtual' parameter.
+ virtual bool is_intrinsic_supported(methodHandle method, bool is_virtual);
+
+ // Processing of command-line flags specific to the C2 compiler.
+ virtual bool is_intrinsic_disabled_by_flag(methodHandle method) {
+ return is_intrinsic_disabled_by_flag(method, NULL);
+ }
+
+ virtual bool is_intrinsic_disabled_by_flag(methodHandle method, methodHandle compilation_context);
+
// Initial size of the code buffer (may be increased at runtime)
static int initial_code_buffer_size();
};
--- a/hotspot/src/share/vm/opto/library_call.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -31,6 +31,7 @@
#include "oops/objArrayKlass.hpp"
#include "opto/addnode.hpp"
#include "opto/arraycopynode.hpp"
+#include "opto/c2compiler.hpp"
#include "opto/callGenerator.hpp"
#include "opto/castnode.hpp"
#include "opto/cfgnode.hpp"
@@ -305,330 +306,40 @@
bool inline_isCompileConstant();
};
-
//---------------------------make_vm_intrinsic----------------------------
CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
vmIntrinsics::ID id = m->intrinsic_id();
assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
- ccstr disable_intr = NULL;
-
- if ((DisableIntrinsic[0] != '\0'
- && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
- (method_has_option_value("DisableIntrinsic", disable_intr)
- && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
- // disabled by a user request on the command line:
- // example: -XX:DisableIntrinsic=_hashCode,_getClass
- return NULL;
- }
-
if (!m->is_loaded()) {
- // do not attempt to inline unloaded methods
- return NULL;
- }
-
- // Only a few intrinsics implement a virtual dispatch.
- // They are expensive calls which are also frequently overridden.
- if (is_virtual) {
- switch (id) {
- case vmIntrinsics::_hashCode:
- case vmIntrinsics::_clone:
- // OK, Object.hashCode and Object.clone intrinsics come in both flavors
- break;
- default:
- return NULL;
- }
- }
-
- // -XX:-InlineNatives disables nearly all intrinsics:
- if (!InlineNatives) {
- switch (id) {
- case vmIntrinsics::_indexOf:
- case vmIntrinsics::_compareTo:
- case vmIntrinsics::_equals:
- case vmIntrinsics::_equalsC:
- case vmIntrinsics::_getAndAddInt:
- case vmIntrinsics::_getAndAddLong:
- case vmIntrinsics::_getAndSetInt:
- case vmIntrinsics::_getAndSetLong:
- case vmIntrinsics::_getAndSetObject:
- case vmIntrinsics::_loadFence:
- case vmIntrinsics::_storeFence:
- case vmIntrinsics::_fullFence:
- break; // InlineNatives does not control String.compareTo
- case vmIntrinsics::_Reference_get:
- break; // InlineNatives does not control Reference.get
- default:
- return NULL;
- }
- }
-
- int predicates = 0;
- bool does_virtual_dispatch = false;
-
- switch (id) {
- case vmIntrinsics::_compareTo:
- if (!SpecialStringCompareTo) return NULL;
- if (!Matcher::match_rule_supported(Op_StrComp)) return NULL;
- break;
- case vmIntrinsics::_indexOf:
- if (!SpecialStringIndexOf) return NULL;
- break;
- case vmIntrinsics::_equals:
- if (!SpecialStringEquals) return NULL;
- if (!Matcher::match_rule_supported(Op_StrEquals)) return NULL;
- break;
- case vmIntrinsics::_equalsC:
- if (!SpecialArraysEquals) return NULL;
- if (!Matcher::match_rule_supported(Op_AryEq)) return NULL;
- break;
- case vmIntrinsics::_arraycopy:
- if (!InlineArrayCopy) return NULL;
- break;
- case vmIntrinsics::_copyMemory:
- if (StubRoutines::unsafe_arraycopy() == NULL) return NULL;
- if (!InlineArrayCopy) return NULL;
- break;
- case vmIntrinsics::_hashCode:
- if (!InlineObjectHash) return NULL;
- does_virtual_dispatch = true;
- break;
- case vmIntrinsics::_clone:
- does_virtual_dispatch = true;
- case vmIntrinsics::_copyOf:
- case vmIntrinsics::_copyOfRange:
- if (!InlineObjectCopy) return NULL;
- // These also use the arraycopy intrinsic mechanism:
- if (!InlineArrayCopy) return NULL;
- break;
- case vmIntrinsics::_encodeISOArray:
- if (!SpecialEncodeISOArray) return NULL;
- if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return NULL;
- break;
- case vmIntrinsics::_checkIndex:
- // We do not intrinsify this. The optimizer does fine with it.
+ // Do not attempt to inline unloaded methods.
return NULL;
-
- case vmIntrinsics::_getCallerClass:
- if (!InlineReflectionGetCallerClass) return NULL;
- if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return NULL;
- break;
-
- case vmIntrinsics::_bitCount_i:
- if (!Matcher::match_rule_supported(Op_PopCountI)) return NULL;
- break;
-
- case vmIntrinsics::_bitCount_l:
- if (!Matcher::match_rule_supported(Op_PopCountL)) return NULL;
- break;
-
- case vmIntrinsics::_numberOfLeadingZeros_i:
- if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return NULL;
- break;
-
- case vmIntrinsics::_numberOfLeadingZeros_l:
- if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return NULL;
- break;
-
- case vmIntrinsics::_numberOfTrailingZeros_i:
- if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return NULL;
- break;
-
- case vmIntrinsics::_numberOfTrailingZeros_l:
- if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL;
- break;
-
- case vmIntrinsics::_reverseBytes_c:
- if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL;
- break;
- case vmIntrinsics::_reverseBytes_s:
- if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return NULL;
- break;
- case vmIntrinsics::_reverseBytes_i:
- if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return NULL;
- break;
- case vmIntrinsics::_reverseBytes_l:
- if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return NULL;
- break;
-
- case vmIntrinsics::_Reference_get:
- // Use the intrinsic version of Reference.get() so that the value in
- // the referent field can be registered by the G1 pre-barrier code.
- // Also add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change it value.
- break;
-
- case vmIntrinsics::_compareAndSwapObject:
-#ifdef _LP64
- if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL;
-#endif
- break;
-
- case vmIntrinsics::_compareAndSwapLong:
- if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL;
- break;
-
- case vmIntrinsics::_getAndAddInt:
- if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL;
- break;
-
- case vmIntrinsics::_getAndAddLong:
- if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL;
- break;
-
- case vmIntrinsics::_getAndSetInt:
- if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL;
- break;
-
- case vmIntrinsics::_getAndSetLong:
- if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL;
- break;
-
- case vmIntrinsics::_getAndSetObject:
-#ifdef _LP64
- if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
- if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL;
- break;
-#else
- if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
- break;
-#endif
-
- case vmIntrinsics::_aescrypt_encryptBlock:
- case vmIntrinsics::_aescrypt_decryptBlock:
- if (!UseAESIntrinsics) return NULL;
- break;
-
- case vmIntrinsics::_multiplyToLen:
- if (!UseMultiplyToLenIntrinsic) return NULL;
- break;
-
- case vmIntrinsics::_squareToLen:
- if (!UseSquareToLenIntrinsic) return NULL;
- break;
-
- case vmIntrinsics::_mulAdd:
- if (!UseMulAddIntrinsic) return NULL;
- break;
-
- case vmIntrinsics::_montgomeryMultiply:
- if (!UseMontgomeryMultiplyIntrinsic) return NULL;
- break;
- case vmIntrinsics::_montgomerySquare:
- if (!UseMontgomerySquareIntrinsic) return NULL;
- break;
-
- case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
- case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
- if (!UseAESIntrinsics) return NULL;
- // these two require the predicated logic
- predicates = 1;
- break;
-
- case vmIntrinsics::_sha_implCompress:
- if (!UseSHA1Intrinsics) return NULL;
- break;
-
- case vmIntrinsics::_sha2_implCompress:
- if (!UseSHA256Intrinsics) return NULL;
- break;
-
- case vmIntrinsics::_sha5_implCompress:
- if (!UseSHA512Intrinsics) return NULL;
- break;
-
- case vmIntrinsics::_digestBase_implCompressMB:
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
- predicates = 3;
- break;
-
- case vmIntrinsics::_ghash_processBlocks:
- if (!UseGHASHIntrinsics) return NULL;
- break;
-
- case vmIntrinsics::_updateCRC32:
- case vmIntrinsics::_updateBytesCRC32:
- case vmIntrinsics::_updateByteBufferCRC32:
- if (!UseCRC32Intrinsics) return NULL;
- break;
-
- case vmIntrinsics::_updateBytesCRC32C:
- case vmIntrinsics::_updateDirectByteBufferCRC32C:
- if (!UseCRC32CIntrinsics) return NULL;
- break;
-
- case vmIntrinsics::_incrementExactI:
- case vmIntrinsics::_addExactI:
- if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_incrementExactL:
- case vmIntrinsics::_addExactL:
- if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_decrementExactI:
- case vmIntrinsics::_subtractExactI:
- if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_decrementExactL:
- case vmIntrinsics::_subtractExactL:
- if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_negateExactI:
- if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_negateExactL:
- if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_multiplyExactI:
- if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL;
- break;
- case vmIntrinsics::_multiplyExactL:
- if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
- break;
-
- case vmIntrinsics::_getShortUnaligned:
- case vmIntrinsics::_getCharUnaligned:
- case vmIntrinsics::_getIntUnaligned:
- case vmIntrinsics::_getLongUnaligned:
- case vmIntrinsics::_putShortUnaligned:
- case vmIntrinsics::_putCharUnaligned:
- case vmIntrinsics::_putIntUnaligned:
- case vmIntrinsics::_putLongUnaligned:
- if (!UseUnalignedAccesses) return NULL;
- break;
-
- default:
+ }
+
+ C2Compiler* compiler = (C2Compiler*)CompileBroker::compiler(CompLevel_full_optimization);
+ bool is_available = false;
+
+ {
+ // For calling is_intrinsic_supported and is_intrinsic_disabled_by_flag
+ // the compiler must transition to '_thread_in_vm' state because both
+ // methods access VM-internal data.
+ VM_ENTRY_MARK;
+ methodHandle mh(THREAD, m->get_Method());
+ methodHandle ct(THREAD, method()->get_Method());
+ is_available = compiler->is_intrinsic_supported(mh, is_virtual) &&
+ !compiler->is_intrinsic_disabled_by_flag(mh, ct);
+ }
+
+ if (is_available) {
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
- break;
- }
-
- // -XX:-InlineClassNatives disables natives from the Class class.
- // The flag applies to all reflective calls, notably Array.newArray
- // (visible to Java programmers as Array.newInstance).
- if (m->holder()->name() == ciSymbol::java_lang_Class() ||
- m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
- if (!InlineClassNatives) return NULL;
- }
-
- // -XX:-InlineThreadNatives disables natives from the Thread class.
- if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
- if (!InlineThreadNatives) return NULL;
- }
-
- // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
- if (m->holder()->name() == ciSymbol::java_lang_Math() ||
- m->holder()->name() == ciSymbol::java_lang_Float() ||
- m->holder()->name() == ciSymbol::java_lang_Double()) {
- if (!InlineMathNatives) return NULL;
- }
-
- // -XX:-InlineUnsafeOps disables natives from the Unsafe class.
- if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
- if (!InlineUnsafeOps) return NULL;
- }
-
- return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
+ return new LibraryIntrinsic(m, is_virtual,
+ vmIntrinsics::predicates_needed(id),
+ vmIntrinsics::does_virtual_dispatch(id),
+ (vmIntrinsics::ID) id);
+ } else {
+ return NULL;
+ }
}
//----------------------register_library_intrinsics-----------------------
@@ -812,7 +523,6 @@
case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile);
case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile);
case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile);
-
case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile);
case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile);
case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile);
--- a/hotspot/src/share/vm/prims/whitebox.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/prims/whitebox.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -528,6 +528,24 @@
return mh->queued_for_compilation();
WB_END
+WB_ENTRY(jboolean, WB_IsIntrinsicAvailable(JNIEnv* env, jobject o, jobject method, jobject compilation_context, jint compLevel))
+ if (compLevel < CompLevel_none || compLevel > CompLevel_highest_tier) {
+ return false; // Intrinsic is not available on a non-existent compilation level.
+ }
+ jmethodID method_id, compilation_context_id;
+ method_id = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
+ methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(method_id));
+ if (compilation_context != NULL) {
+ compilation_context_id = reflected_method_to_jmid(thread, env, compilation_context);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
+ methodHandle cch(THREAD, Method::checked_resolve_jmethod_id(compilation_context_id));
+ return CompileBroker::compiler(compLevel)->is_intrinsic_available(mh, cch);
+ } else {
+ return CompileBroker::compiler(compLevel)->is_intrinsic_available(mh, NULL);
+ }
+WB_END
+
WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
CHECK_JNI_EXCEPTION_(env, CompLevel_none);
@@ -1477,14 +1495,17 @@
#endif // INCLUDE_NMT
{CC"deoptimizeFrames", CC"(Z)I", (void*)&WB_DeoptimizeFrames },
{CC"deoptimizeAll", CC"()V", (void*)&WB_DeoptimizeAll },
- {CC"deoptimizeMethod0", CC"(Ljava/lang/reflect/Executable;Z)I",
- (void*)&WB_DeoptimizeMethod },
+ {CC"deoptimizeMethod0", CC"(Ljava/lang/reflect/Executable;Z)I",
+ (void*)&WB_DeoptimizeMethod },
{CC"isMethodCompiled0", CC"(Ljava/lang/reflect/Executable;Z)Z",
(void*)&WB_IsMethodCompiled },
{CC"isMethodCompilable0", CC"(Ljava/lang/reflect/Executable;IZ)Z",
(void*)&WB_IsMethodCompilable},
{CC"isMethodQueuedForCompilation0",
CC"(Ljava/lang/reflect/Executable;)Z", (void*)&WB_IsMethodQueuedForCompilation},
+ {CC"isIntrinsicAvailable0",
+ CC"(Ljava/lang/reflect/Executable;Ljava/lang/reflect/Executable;I)Z",
+ (void*)&WB_IsIntrinsicAvailable},
{CC"makeMethodNotCompilable0",
CC"(Ljava/lang/reflect/Executable;IZ)V", (void*)&WB_MakeMethodNotCompilable},
{CC"testSetDontInlineMethod0",
--- a/hotspot/src/share/vm/runtime/arguments.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -1226,32 +1226,6 @@
}
}
-/**
- * Returns the minimum number of compiler threads needed to run the JVM. The following
- * configurations are possible.
- *
- * 1) The JVM is build using an interpreter only. As a result, the minimum number of
- * compiler threads is 0.
- * 2) The JVM is build using the compiler(s) and tiered compilation is disabled. As
- * a result, either C1 or C2 is used, so the minimum number of compiler threads is 1.
- * 3) The JVM is build using the compiler(s) and tiered compilation is enabled. However,
- * the option "TieredStopAtLevel < CompLevel_full_optimization". As a result, only
- * C1 can be used, so the minimum number of compiler threads is 1.
- * 4) The JVM is build using the compilers and tiered compilation is enabled. The option
- * 'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result,
- * the minimum number of compiler threads is 2.
- */
-int Arguments::get_min_number_of_compiler_threads() {
-#if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK)
- return 0; // case 1
-#else
- if (!TieredCompilation || (TieredStopAtLevel < CompLevel_full_optimization)) {
- return 1; // case 2 or case 3
- }
- return 2; // case 4 (tiered)
-#endif
-}
-
#if INCLUDE_ALL_GCS
static void disable_adaptive_size_policy(const char* collector_name) {
if (UseAdaptiveSizePolicy) {
@@ -2199,10 +2173,6 @@
status = false;
}
- int min_number_of_compiler_threads = get_min_number_of_compiler_threads();
- // The default CICompilerCount's value is CI_COMPILER_COUNT.
- assert(min_number_of_compiler_threads <= CI_COMPILER_COUNT, "minimum should be less or equal default number");
-
if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
}
--- a/hotspot/src/share/vm/runtime/arguments.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -445,9 +445,6 @@
static char* SharedArchivePath;
public:
- // Tiered
- static int get_min_number_of_compiler_threads();
-
// Scale compile thresholds
// Returns threshold scaled with CompileThresholdScaling
static intx scaled_compile_threshold(intx threshold, double scale);
--- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp Fri Jul 31 12:13:57 2015 +0200
@@ -41,3 +41,45 @@
return Flag::SUCCESS;
}
}
+
+/**
+ * Validate the minimum number of compiler threads needed to run the
+ * JVM. The following configurations are possible.
+ *
+ * 1) The JVM is build using an interpreter only. As a result, the minimum number of
+ * compiler threads is 0.
+ * 2) The JVM is build using the compiler(s) and tiered compilation is disabled. As
+ * a result, either C1 or C2 is used, so the minimum number of compiler threads is 1.
+ * 3) The JVM is build using the compiler(s) and tiered compilation is enabled. However,
+ * the option "TieredStopAtLevel < CompLevel_full_optimization". As a result, only
+ * C1 can be used, so the minimum number of compiler threads is 1.
+ * 4) The JVM is build using the compilers and tiered compilation is enabled. The option
+ * 'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result,
+ * the minimum number of compiler threads is 2.
+ */
+Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value) {
+ int min_number_of_compiler_threads = 0;
+#if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK)
+ // case 1
+#else
+ if (!TieredCompilation || (TieredStopAtLevel < CompLevel_full_optimization)) {
+ min_number_of_compiler_threads = 1; // case 2 or case 3
+ } else {
+ min_number_of_compiler_threads = 2; // case 4 (tiered)
+ }
+#endif
+
+ // The default CICompilerCount's value is CI_COMPILER_COUNT.
+ assert(min_number_of_compiler_threads <= CI_COMPILER_COUNT, "minimum should be less or equal default number");
+
+ if (*value < (intx)min_number_of_compiler_threads) {
+ if (verbose == true) {
+ jio_fprintf(defaultStream::error_stream(),
+ "CICompilerCount=" INTX_FORMAT " must be at least %d \n",
+ *value, min_number_of_compiler_threads);
+ }
+ return Flag::VIOLATES_CONSTRAINT;
+ } else {
+ return Flag::SUCCESS;
+ }
+}
--- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -36,4 +36,6 @@
Flag::Error AliasLevelConstraintFunc(bool verbose, intx* value);
+Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value);
+
#endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSCOMPILER_HPP */
--- a/hotspot/src/share/vm/runtime/globals.hpp Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Fri Jul 31 12:13:57 2015 +0200
@@ -2643,8 +2643,8 @@
/* because of overflow issue */ \
product(intx, CICompilerCount, CI_COMPILER_COUNT, \
"Number of compiler threads to run") \
- range((intx)Arguments::get_min_number_of_compiler_threads(), \
- max_jint) \
+ range(0, max_jint) \
+ constraint(CICompilerCountConstraintFunc, AtParse) \
\
product(intx, CompilationPolicyChoice, 0, \
"which compilation policy (0-3)") \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/arguments/CheckCICompilerCount.java Fri Jul 31 12:13:57 2015 +0200
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.*;
+
+/*
+ * @test CheckCheckCICompilerCount
+ * @bug 8130858
+ * @summary Check that correct range of values for CICompilerCount are allowed depending on whether tiered is enabled or not
+ * @library /testlibrary
+ * @modules java.base/sun.misc
+ * java.management
+ * @run main CheckCICompilerCount
+ */
+
+public class CheckCICompilerCount {
+ private static final String[][] NON_TIERED_ARGUMENTS = {
+ {
+ "-XX:-TieredCompilation",
+ "-XX:+PrintFlagsFinal",
+ "-XX:CICompilerCount=0",
+ "-version"
+ },
+ {
+ "-XX:-TieredCompilation",
+ "-XX:+PrintFlagsFinal",
+ "-XX:CICompilerCount=1",
+ "-version"
+ }
+ };
+
+ private static final String[][] NON_TIERED_EXPECTED_OUTPUTS = {
+ {
+ "CICompilerCount=0 must be at least 1",
+ "Improperly specified VM option 'CICompilerCount=0'"
+ },
+ {
+ "intx CICompilerCount := 1 {product}"
+ }
+ };
+
+ private static final int[] NON_TIERED_EXIT = {
+ 1,
+ 0
+ };
+
+ private static final String[][] TIERED_ARGUMENTS = {
+ {
+ "-XX:+TieredCompilation",
+ "-XX:+PrintFlagsFinal",
+ "-XX:CICompilerCount=1",
+ "-version"
+ },
+ {
+ "-XX:+TieredCompilation",
+ "-XX:+PrintFlagsFinal",
+ "-XX:CICompilerCount=2",
+ "-version"
+ }
+ };
+
+ private static final String[][] TIERED_EXPECTED_OUTPUTS = {
+ {
+ "CICompilerCount=1 must be at least 2",
+ "Improperly specified VM option 'CICompilerCount=1'"
+ },
+ {
+ "intx CICompilerCount := 2 {product}"
+ }
+ };
+
+ private static final int[] TIERED_EXIT = {
+ 1,
+ 0
+ };
+
+ private static void verifyValidOption(String[] arguments, String[] expected_outputs, int exit, boolean tiered) throws Exception {
+ ProcessBuilder pb;
+ OutputAnalyzer out;
+
+ pb = ProcessTools.createJavaProcessBuilder(arguments);
+ out = new OutputAnalyzer(pb.start());
+
+ try {
+ out.shouldHaveExitValue(exit);
+ for (String expected_output : expected_outputs) {
+ out.shouldContain(expected_output);
+ }
+ } catch (RuntimeException e) {
+ // Check if tiered compilation is available in this JVM
+ // Version. Throw exception only if it is available.
+ if (!(tiered && out.getOutput().contains("TieredCompilation is disabled in this release."))) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (NON_TIERED_ARGUMENTS.length != NON_TIERED_EXPECTED_OUTPUTS.length || NON_TIERED_ARGUMENTS.length != NON_TIERED_EXIT.length) {
+ throw new RuntimeException("Test is set up incorrectly: length of arguments, expected outputs and exit codes in non-tiered mode of operation do not match.");
+ }
+
+ if (TIERED_ARGUMENTS.length != TIERED_EXPECTED_OUTPUTS.length || TIERED_ARGUMENTS.length != TIERED_EXIT.length) {
+ throw new RuntimeException("Test is set up incorrectly: length of arguments, expected outputs and exit codes in tiered mode of operation do not match.");
+ }
+
+ for (int i = 0; i < NON_TIERED_ARGUMENTS.length; i++) {
+ verifyValidOption(NON_TIERED_ARGUMENTS[i], NON_TIERED_EXPECTED_OUTPUTS[i], NON_TIERED_EXIT[i], false);
+ }
+
+ for (int i = 0; i < TIERED_ARGUMENTS.length; i++) {
+ verifyValidOption(TIERED_ARGUMENTS[i], TIERED_EXPECTED_OUTPUTS[i], TIERED_EXIT[i], true);
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java Fri Jul 31 12:13:57 2015 +0200
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+import java.lang.reflect.Executable;
+import java.util.concurrent.Callable;
+import java.util.Objects;
+/*
+ * @test
+ * @bug 8130832
+ * @library /testlibrary /../../test/lib /compiler/whitebox /compiler/testlibrary
+ * @build IntrinsicAvailableTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:.
+ * -XX:+UnlockDiagnosticVMOptions
+ * -XX:+WhiteBoxAPI
+ * -XX:+UseCRC32Intrinsics
+ * IntrinsicAvailableTest
+ * @run main/othervm -Xbootclasspath/a:.
+ * -XX:+UnlockDiagnosticVMOptions
+ * -XX:+WhiteBoxAPI
+ * -XX:-UseCRC32Intrinsics
+ * IntrinsicAvailableTest
+ */
+public class IntrinsicAvailableTest extends CompilerWhiteBoxTest {
+ protected String VMName;
+
+ public IntrinsicAvailableTest(IntrinsicAvailableTestTestCase testCase) {
+ super(testCase);
+ VMName = System.getProperty("java.vm.name");
+ }
+
+ public static class IntrinsicAvailableTestTestCase implements TestCase {
+
+ public String name() {
+ return "IntrinsicAvailableTestTestCase";
+ }
+
+ public Executable getExecutable() {
+ // Using a single method to test the
+ // WhiteBox.isIntrinsicAvailable(Executable method, int compLevel)
+ // call for the compilation level corresponding to both the C1 and C2
+ // compiler keeps the current test simple.
+ //
+ // The tested method is java.util.zip.CRC32.update(int, int) because
+ // both C1 and C2 define an intrinsic for the method and
+ // the UseCRC32Intrinsics flag can be used to enable/disable
+ // intrinsification of the method in both product and fastdebug
+ // builds.
+ try {
+ return Class.forName("java.util.zip.CRC32").getDeclaredMethod("update", int.class, int.class);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException("Test bug, method unavailable. " + e);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException("Test bug, class unavailable. " + e);
+ }
+ }
+
+ public Callable<Integer> getCallable() {
+ return null;
+ }
+
+ public boolean isOsr() {
+ return false;
+ }
+
+ }
+
+ protected void checkIntrinsicForCompilationLevel(Executable method, int compLevel) throws Exception {
+ boolean intrinsicEnabled = Boolean.valueOf(getVMOption("UseCRC32Intrinsics"));
+ boolean intrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(method,
+ compLevel);
+
+ String intrinsicEnabledMessage = intrinsicEnabled ? "enabled" : "disabled";
+ String intrinsicAvailableMessage = intrinsicAvailable ? "available" : "not available";
+
+ if (intrinsicEnabled == intrinsicAvailable) {
+ System.out.println("Expected result: intrinsic for java.util.zip.CRC32.update() is " +
+ intrinsicEnabledMessage + " and intrinsic is " + intrinsicAvailableMessage +
+ " at compilation level " + compLevel);
+ } else {
+ throw new RuntimeException("Unexpected result: intrinsic for java.util.zip.CRC32.update() is " +
+ intrinsicEnabledMessage + " but intrinsic is " + intrinsicAvailableMessage +
+ " at compilation level " + compLevel);
+ }
+ }
+
+ protected boolean isServerVM() {
+ return VMName.toLowerCase().contains("server");
+ }
+
+ public void test() throws Exception {
+ Executable intrinsicMethod = testCase.getExecutable();
+ if (isServerVM()) {
+ if (TIERED_COMPILATION) {
+ checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE);
+ }
+ checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_FULL_OPTIMIZATION);
+ } else {
+ checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE);
+ }
+ }
+
+ public static void main(String args[]) throws Exception {
+ new IntrinsicAvailableTest(new IntrinsicAvailableTestTestCase()).test();
+ }
+}
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java Fri Jul 31 12:13:57 2015 +0200
@@ -67,7 +67,7 @@
compileAtLevel(CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
}
- if (!isIntrinsicSupported()) {
+ if (!isIntrinsicAvailable()) {
expectedIntrinsicCount = 0;
}
break;
@@ -114,7 +114,11 @@
}
}
- protected abstract boolean isIntrinsicSupported();
+ // An intrinsic is available if:
+ // - the intrinsic is enabled (by using the appropriate command-line flag) and
+ // - the intrinsic is supported by the VM (i.e., the platform on which the VM is
+ // running provides the instructions necessary for the VM to generate the intrinsic).
+ protected abstract boolean isIntrinsicAvailable();
protected abstract String getIntrinsicId();
@@ -123,14 +127,20 @@
}
static class IntTest extends IntrinsicBase {
+
+ protected boolean isIntrinsicAvailable; // The tested intrinsic is available on the current platform.
+
protected IntTest(MathIntrinsic.IntIntrinsic testCase) {
super(testCase);
+ // Only the C2 compiler intrinsifies exact math methods
+ // so check if the intrinsics are available with C2.
+ isIntrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(testCase.getTestMethod(),
+ COMP_LEVEL_FULL_OPTIMIZATION);
}
@Override
- protected boolean isIntrinsicSupported() {
- return isServerVM() && Boolean.valueOf(useMathExactIntrinsics)
- && (Platform.isX86() || Platform.isX64() || Platform.isAArch64());
+ protected boolean isIntrinsicAvailable() {
+ return isIntrinsicAvailable;
}
@Override
@@ -140,14 +150,20 @@
}
static class LongTest extends IntrinsicBase {
+
+ protected boolean isIntrinsicAvailable; // The tested intrinsic is available on the current platform.
+
protected LongTest(MathIntrinsic.LongIntrinsic testCase) {
super(testCase);
+ // Only the C2 compiler intrinsifies exact math methods
+ // so check if the intrinsics are available with C2.
+ isIntrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(testCase.getTestMethod(),
+ COMP_LEVEL_FULL_OPTIMIZATION);
}
@Override
- protected boolean isIntrinsicSupported() {
- return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) &&
- (Platform.isX64() || Platform.isPPC() || Platform.isAArch64());
+ protected boolean isIntrinsicAvailable() {
+ return isIntrinsicAvailable;
}
@Override
--- a/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java Mon Jul 27 13:56:26 2015 -0700
+++ b/hotspot/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java Fri Jul 31 12:13:57 2015 +0200
@@ -29,11 +29,21 @@
enum IntIntrinsic implements CompilerWhiteBoxTest.TestCase {
Add {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("addExact", int.class, int.class);
+ }
+
+ @Override
Object execMathMethod() {
return intR = Math.addExact(int1, int2);
}
},
- Subtract {
+ Subtract {
+ @Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("subtractExact", int.class, int.class);
+ }
+
@Override
Object execMathMethod() {
return intR = Math.subtractExact(int1, int2);
@@ -41,34 +51,66 @@
},
Multiply {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("multiplyExact", int.class, int.class);
+ }
+
+ @Override
Object execMathMethod() {
return intR = Math.multiplyExact(int1, int2);
}
},
Increment {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("incrementExact", int.class);
+ }
+
+ @Override
Object execMathMethod() {
return intR = Math.incrementExact(int1);
}
},
Decrement {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("decrementExact", int.class);
+ }
+
+ @Override
Object execMathMethod() {
return intR = Math.decrementExact(int1);
}
},
Negate {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("negateExact", int.class);
+ }
+
+ @Override
Object execMathMethod() {
return intR = Math.negateExact(int1);
}
};
+
protected int int1;
protected int int2;
protected int intR;
+ abstract Executable testMethod() throws NoSuchMethodException, ClassNotFoundException;
abstract Object execMathMethod();
+ public Executable getTestMethod() {
+ try {
+ return testMethod();
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException("Test bug, no such method: " + e);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException("Test bug, no such class: " + e);
+ }
+ }
+
@Override
public Executable getExecutable() {
try {
@@ -93,36 +135,66 @@
enum LongIntrinsic implements CompilerWhiteBoxTest.TestCase {
Add {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("addExact", long.class, long.class);
+ }
+
+ @Override
Object execMathMethod() {
return longR = Math.addExact(long1, long2);
}
},
Subtract {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("subtractExact", long.class, long.class);
+ }
+
+ @Override
Object execMathMethod() {
return longR = Math.subtractExact(long1, long2);
}
},
Multiply {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("multiplyExact", long.class, long.class);
+ }
+
+ @Override
Object execMathMethod() {
return longR = Math.multiplyExact(long1, long2);
}
},
Increment {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("incrementExact", long.class);
+ }
+
+ @Override
Object execMathMethod() {
return longR = Math.incrementExact(long1);
}
},
Decrement {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("decrementExact", long.class);
+ }
+
+ @Override
Object execMathMethod() {
return longR = Math.decrementExact(long1);
}
},
Negate {
@Override
+ Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+ return Class.forName("java.lang.Math").getDeclaredMethod("negateExact", long.class);
+ }
+
+ @Override
Object execMathMethod() {
return longR = Math.negateExact(long1);
}
@@ -131,8 +203,19 @@
protected long long2;
protected long longR;
+ abstract Executable testMethod() throws NoSuchMethodException, ClassNotFoundException;
abstract Object execMathMethod();
+ public Executable getTestMethod() {
+ try {
+ return testMethod();
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException("Test bug, no such method: " + e);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException("Test bug, no such class: " + e);
+ }
+ }
+
@Override
public Executable getExecutable() {
try {