--- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -44,24 +44,30 @@
#define __ masm->
#ifndef PRODUCT
-extern "C" void bad_compiled_vtable_index(JavaThread* thread,
- oop receiver,
- int index);
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
#endif
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
- const int aarch64_code_length = VtableStub::pd_code_size_limit(true);
- VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), aarch64_code_length);
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ lea(r16, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
__ incrementw(Address(r16));
@@ -78,21 +84,35 @@
#ifndef PRODUCT
if (DebugVtables) {
Label L;
+ // TODO: find upper bound for this debug code.
+ start_pc = __ pc();
+
// check offset vs vtable length
__ ldrw(rscratch1, Address(r16, Klass::vtable_length_offset()));
__ cmpw(rscratch1, vtable_index * vtableEntry::size());
__ br(Assembler::GT, L);
__ enter();
__ mov(r2, vtable_index);
- __ call_VM(noreg,
- CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
+
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
+ const ptrdiff_t estimate = 256;
+ const ptrdiff_t codesize = __ pc() - start_pc;
+ slop_delta = estimate - codesize; // call_VM varies in length, depending on data
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
+
__ leave();
__ bind(L);
}
#endif // PRODUCT
+ start_pc = __ pc();
__ lookup_virtual_method(r16, vtable_index, rmethod);
+ slop_delta = 8 - (int)(__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cbz(rmethod, L);
@@ -101,6 +121,8 @@
__ stop("Vtable entry is NULL");
__ bind(L);
}
+#endif // PRODUCT
+
// r0: receiver klass
// rmethod: Method*
// r2: receiver
@@ -108,43 +130,46 @@
__ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
__ br(rscratch1);
- __ flush();
+ masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
- vtable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- // Note well: pd_code_size_limit is the absolute minimum we can get
- // away with. If you add code here, bump the code stub size
- // returned by pd_code_size_limit!
- const int code_length = VtableStub::pd_code_size_limit(false);
- VtableStub* s = new(code_length) VtableStub(false, itable_index);
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
+ return NULL;
+ }
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
__ incrementw(Address(r10));
}
#endif
+ // get receiver (need to skip return address on top of stack)
+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
// Entry arguments:
// rscratch2: CompiledICHolder
// j_rarg0: Receiver
-
// Most registers are in use; we'll use r16, rmethod, r10, r11
const Register recv_klass_reg = r10;
const Register holder_klass_reg = r16; // declaring interface klass (DECC)
@@ -157,8 +182,8 @@
__ ldr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ ldr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
- // get receiver (need to skip return address on top of stack)
- assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+ start_pc = __ pc();
+
// get receiver klass (also an implicit null-check)
address npe_addr = __ pc();
__ load_klass(recv_klass_reg, j_rarg0);
@@ -172,16 +197,25 @@
L_no_such_interface,
/*return_method=*/false);
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
+ start_pc = __ pc();
+
// Get selected method from declaring class and itable index
__ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg
__ lookup_interface_method(// inputs: rec. class, interface, itable index
- recv_klass_reg, holder_klass_reg, itable_index,
- // outputs: method, scan temp. reg
- rmethod, temp_reg,
- L_no_such_interface);
+ recv_klass_reg, holder_klass_reg, itable_index,
+ // outputs: method, scan temp. reg
+ rmethod, temp_reg,
+ L_no_such_interface);
+
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
- // method (rmethod): Method*
- // j_rarg0: receiver
+ // Reduce "estimate" such that "padding" does not drop below 8.
+ const ptrdiff_t estimate = 152;
+ const ptrdiff_t codesize = typecheckSize + lookupSize;
+ slop_delta = (int)(estimate - codesize);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
#ifdef ASSERT
if (DebugVtables) {
@@ -206,92 +240,17 @@
// We force resolving of the call site by jumping to the "handle
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
+ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
__ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
- __ flush();
+ masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
- itable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- int size = DebugVtables ? 216 : 0;
- if (CountCompiledCalls)
- size += 6 * 4;
- // FIXME: vtable stubs only need 36 bytes
- if (is_vtable_stub)
- size += 52;
- else
- size += 176;
- return size;
-
- // In order to tune these parameters, run the JVM with VM options
- // +PrintMiscellaneous and +WizardMode to see information about
- // actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
- //
- // If Universe::narrow_klass_base is nonzero, decoding a compressed
- // class can take zeveral instructions.
- //
- // The JVM98 app. _202_jess has a megamorphic interface call.
- // The itable code looks like this:
-
- // ldr xmethod, [xscratch2,#CompiledICHolder::holder_klass_offset]
- // ldr x0, [xscratch2]
- // ldr w10, [x1,#oopDesc::klass_offset_in_bytes]
- // mov xheapbase, #0x3c000000 // #narrow_klass_base
- // movk xheapbase, #0x3f7, lsl #32
- // add x10, xheapbase, x10
- // mov xheapbase, #0xe7ff0000 // #heapbase
- // movk xheapbase, #0x3f7, lsl #32
- // ldr w11, [x10,#vtable_length_offset]
- // add x11, x10, x11, uxtx #3
- // add x11, x11, #itableMethodEntry::method_offset_in_bytes
- // ldr x10, [x11]
- // cmp xmethod, x10
- // b.eq found_method
- // search:
- // cbz x10, no_such_interface
- // add x11, x11, #0x10
- // ldr x10, [x11]
- // cmp xmethod, x10
- // b.ne search
- // found_method:
- // ldr w10, [x1,#oopDesc::klass_offset_in_bytes]
- // mov xheapbase, #0x3c000000 // #narrow_klass_base
- // movk xheapbase, #0x3f7, lsl #32
- // add x10, xheapbase, x10
- // mov xheapbase, #0xe7ff0000 // #heapbase
- // movk xheapbase, #0x3f7, lsl #32
- // ldr w11, [x10,#vtable_length_offset]
- // add x11, x10, x11, uxtx #3
- // add x11, x11, #itableMethodEntry::method_offset_in_bytes
- // add x10, x10, #itentry_off
- // ldr xmethod, [x11]
- // cmp x0, xmethod
- // b.eq found_method2
- // search2:
- // cbz xmethod, 0x000003ffa872e6cc
- // add x11, x11, #0x10
- // ldr xmethod, [x11]
- // cmp x0, xmethod
- // b.ne search2
- // found_method2:
- // ldr w11, [x11,#itableOffsetEntry::offset_offset_in_bytes]
- // ldr xmethod, [x10,w11,uxtw]
- // ldr xscratch1, [xmethod,#Method::from_compiled_offset]
- // br xscratch1
- // no_such_interface:
- // b throw_ICCE_entry
-
+int VtableStub::pd_code_alignment() {
+ // aarch64 cache line size is not an architected constant. We just align on 4 bytes (instruction size).
+ const unsigned int icache_line_size = 4;
+ return icache_line_size;
}
-
-int VtableStub::pd_code_alignment() { return 4; }
--- a/src/hotspot/cpu/arm/vtableStubs_arm.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/arm/vtableStubs_arm.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -48,17 +48,31 @@
#endif
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
- const int code_length = VtableStub::pd_code_size_limit(true);
- VtableStub* s = new(code_length) VtableStub(true, vtable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ // Implementation required?
+ }
+#endif
+
assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
const Register tmp = Rtemp; // Rtemp OK, should be free at call sites
@@ -66,17 +80,33 @@
address npe_addr = __ pc();
__ load_klass(tmp, R0);
- {
- int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
- int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
+#ifndef PRODUCT
+ if (DebugVtables) {
+ // Implementation required?
+ }
+#endif
+
+ start_pc = __ pc();
+ { // lookup virtual method
+ int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
+ int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
- assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
- int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
- if (method_offset & ~offset_mask) {
- __ add(tmp, tmp, method_offset & ~offset_mask);
+ assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
+ int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
+ if (method_offset & ~offset_mask) {
+ __ add(tmp, tmp, method_offset & ~offset_mask);
+ }
+ __ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
}
- __ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
+ slop_delta = 8 - (int)(__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ // Implementation required?
}
+#endif
address ame_addr = __ pc();
#ifdef AARCH64
@@ -87,35 +117,36 @@
#endif // AARCH64
masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
- vtable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // FIXME ARM: need correct 'slop' - below is x86 code
- // shut the door on sizing bugs
- //int slop = 8; // 32-bit offset is this much larger than a 13-bit one
- //assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- const int code_length = VtableStub::pd_code_size_limit(false);
- VtableStub* s = new(code_length) VtableStub(false, itable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ // Implementation required?
+ }
+#endif
+
assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
// R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled
@@ -123,15 +154,16 @@
const Register Rintf = AARCH64_ONLY(R10) NOT_AARCH64(R5);
const Register Rscan = AARCH64_ONLY(R11) NOT_AARCH64(R6);
+ Label L_no_such_interface;
+
assert_different_registers(Ricklass, Rclass, Rintf, Rscan, Rtemp);
- // Calculate the start of itable (itable goes after vtable)
- const int scale = exact_log2(vtableEntry::size_in_bytes());
+ start_pc = __ pc();
+
+ // get receiver klass (also an implicit null-check)
address npe_addr = __ pc();
__ load_klass(Rclass, R0);
- Label L_no_such_interface;
-
// Receiver subtype check against REFC.
__ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@@ -140,6 +172,9 @@
noreg, Rscan, Rtemp,
L_no_such_interface);
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
+ start_pc = __ pc();
+
// Get Method* and entry point for compiler
__ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@@ -148,6 +183,21 @@
Rmethod, Rscan, Rtemp,
L_no_such_interface);
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+ // Reduce "estimate" such that "padding" does not drop below 8.
+ const ptrdiff_t estimate = 140;
+ const ptrdiff_t codesize = typecheckSize + lookupSize;
+ slop_delta = (int)(estimate - codesize);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
+#ifndef PRODUCT
+ if (DebugVtables) {
+ // Implementation required?
+ }
+#endif
+
address ame_addr = __ pc();
#ifdef AARCH64
@@ -158,7 +208,6 @@
#endif // AARCH64
__ bind(L_no_such_interface);
-
// Handle IncompatibleClassChangeError in itable stubs.
// More detailed error message.
// We force resolving of the call site by jumping to the "handle
@@ -168,43 +217,13 @@
__ jump(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type, Rtemp);
masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
- itable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // FIXME ARM: need correct 'slop' - below is x86 code
- // shut the door on sizing bugs
- //int slop = 8; // 32-bit offset is this much larger than a 13-bit one
- //assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- int instr_count;
-
- if (is_vtable_stub) {
- // vtable stub size
- instr_count = NOT_AARCH64(4) AARCH64_ONLY(5);
- } else {
- // itable stub size
- instr_count = NOT_AARCH64(31) AARCH64_ONLY(31);
- }
-
-#ifdef AARCH64
- if (UseCompressedClassPointers) {
- instr_count += MacroAssembler::instr_count_for_decode_klass_not_null();
- }
-#endif // AARCH64
-
- return instr_count * Assembler::InstructionSize;
+int VtableStub::pd_code_alignment() {
+ // ARM32 cache line size is not an architected constant. We just align on word size.
+ const unsigned int icache_line_size = wordSize;
+ return icache_line_size;
}
-
-int VtableStub::pd_code_alignment() {
- return 8;
-}
--- a/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -39,36 +39,39 @@
#define __ masm->
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) // nothing
-#else
-#define BLOCK_COMMENT(str) __ block_comment(str)
-#endif
-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
#endif
-// Used by compiler only; may use only caller saved, non-argument
-// registers.
+// Used by compiler only; may use only caller saved, non-argument registers.
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
- // PPC port: use fixed size.
- const int code_length = VtableStub::pd_code_size_limit(true);
- VtableStub* s = new (code_length) VtableStub(true, vtable_index);
-
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 8; // just a two-instruction safety net
+ int slop_delta = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
+ start_pc = __ pc();
+ int load_const_maxLen = 5*BytesPerInstWord; // load_const generates 5 instructions. Assume that as max size for laod_const_optimized
int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true);
+ slop_delta = load_const_maxLen - (__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ lwz(R12_scratch2, offs, R11_scratch1);
__ addi(R12_scratch2, R12_scratch2, 1);
__ stw(R12_scratch2, offs, R11_scratch1);
@@ -77,17 +80,13 @@
assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1");
- // Get receiver klass.
const Register rcvr_klass = R11_scratch1;
-
- // We might implicit NULL fault here.
address npe_addr = __ pc(); // npe = null pointer exception
+ // check if we must do an explicit check (implicit checks disabled, offset too large).
__ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
+ // Get receiver klass.
__ load_klass(rcvr_klass, R3);
- // Set method (in case of interpreted method), and destination address.
- int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index*vtableEntry::size_in_bytes();
-
#ifndef PRODUCT
if (DebugVtables) {
Label L;
@@ -102,7 +101,9 @@
}
#endif
- int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+ int entry_offset = in_bytes(Klass::vtable_start_offset()) +
+ vtable_index*vtableEntry::size_in_bytes();
+ int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
__ ld(R19_method, (RegisterOrConstant)v_off, rcvr_klass);
@@ -116,40 +117,48 @@
}
#endif
- // If the vtable entry is null, the method is abstract.
address ame_addr = __ pc(); // ame = abstract method error
+ // if the vtable entry is null, the method is abstract
+ // NOTE: for vtable dispatches, the vtable entry will never be null.
+
__ null_check(R19_method, in_bytes(Method::from_compiled_offset()), /*implicit only*/NULL);
__ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
__ mtctr(R12_scratch2);
__ bctr();
masm->flush();
-
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
- s->set_exception_points(npe_addr, ame_addr);
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- // PPC port: use fixed size.
- const int code_length = VtableStub::pd_code_size_limit(false);
- VtableStub* s = new (code_length) VtableStub(false, itable_index);
-
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 8; // just a two-instruction safety net
+ int slop_delta = 0;
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
- address start_pc;
+ int load_const_maxLen = 5*BytesPerInstWord; // load_const generates 5 instructions. Assume that as max size for laod_const_optimized
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
+ start_pc = __ pc();
int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true);
+ slop_delta = load_const_maxLen - (__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ lwz(R12_scratch2, offs, R11_scratch1);
__ addi(R12_scratch2, R12_scratch2, 1);
__ stw(R12_scratch2, offs, R11_scratch1);
@@ -209,33 +218,22 @@
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
__ bind(L_no_such_interface);
+ start_pc = __ pc();
__ load_const_optimized(R11_scratch1, SharedRuntime::get_handle_wrong_method_stub(), R12_scratch2);
+ slop_delta = load_const_maxLen - (__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ mtctr(R11_scratch1);
__ bctr();
masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- if (DebugVtables || CountCompiledCalls || VerifyOops) {
- return 1000;
- }
- int size = is_vtable_stub ? 20 + 8 : 164 + 20; // Plain + safety
- if (UseCompressedClassPointers) {
- size += MacroAssembler::instr_size_for_decode_klass_not_null();
- }
- if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
- size += is_vtable_stub ? 8 : 12;
- }
- return size;
-}
-
int VtableStub::pd_code_alignment() {
+ // Power cache line size is 128 bytes, but we want to limit alignment loss.
const unsigned int icache_line_size = 32;
return icache_line_size;
}
--- a/src/hotspot/cpu/s390/vtableStubs_s390.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/s390/vtableStubs_s390.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,9 +37,6 @@
#include "opto/runtime.hpp"
#endif
-// Machine-dependent part of VtableStubs: create vtableStub of correct
-// size and initialize its code.
-
#define __ masm->
#ifndef PRODUCT
@@ -48,123 +45,140 @@
// Used by compiler only; may use only caller saved, non-argument registers.
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-
- const int code_length = VtableStub::pd_code_size_limit(true);
- VtableStub *s = new(code_length) VtableStub(true, vtable_index);
- if (s == NULL) { // Indicates OOM In the code cache.
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+
ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
- MacroAssembler *masm = new MacroAssembler(&cb);
- int padding_bytes = 0;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
+ MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
- // Count unused bytes
- // worst case actual size
- padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
-
+ // worst case actual size
+ slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
// Use generic emitter for direct memory increment.
// Abuse Z_method as scratch register for generic emitter.
// It is loaded further down anyway before it is first used.
+ // No dynamic code size variance here, increment is 1, always.
__ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
}
#endif
assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
- // Get receiver klass.
- // Must do an explicit check if implicit checks are disabled.
- address npe_addr = __ pc(); // npe == NULL ptr exception
+ const Register rcvr_klass = Z_R1_scratch;
+ address npe_addr = __ pc(); // npe == NULL ptr exception
+ // check if we must do an explicit check (implicit checks disabled, offset too large).
__ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
- const Register rcvr_klass = Z_R1_scratch;
+ // Get receiver klass.
__ load_klass(rcvr_klass, Z_ARG1);
- // Set method (in case of interpreted method), and destination address.
- int entry_offset = in_bytes(Klass::vtable_start_offset()) +
- vtable_index * vtableEntry::size_in_bytes();
-
#ifndef PRODUCT
if (DebugVtables) {
- Label L;
+ NearLabel L;
// Check offset vs vtable length.
const Register vtable_idx = Z_R0_scratch;
- // Count unused bytes.
- // worst case actual size
- padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true);
+ // worst case actual size
+ slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size(), true);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
- assert(Immediate::is_uimm12(in_bytes(Klass::vtable_length_offset())), "disp to large");
+ assert(Displacement::is_shortDisp(in_bytes(Klass::vtable_length_offset())), "disp to large");
__ z_cl(vtable_idx, in_bytes(Klass::vtable_length_offset()), rcvr_klass);
__ z_brl(L);
__ z_lghi(Z_ARG3, vtable_index); // Debug code, don't optimize.
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false);
// Count unused bytes (assume worst case here).
- padding_bytes += 12;
+ slop_bytes += 12;
__ bind(L);
}
#endif
- int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+ int entry_offset = in_bytes(Klass::vtable_start_offset()) +
+ vtable_index * vtableEntry::size_in_bytes();
+ int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+ // Set method (in case of interpreted method), and destination address.
// Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC.
if (Displacement::is_validDisp(v_off)) {
__ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/);
// Account for the load_const in the else path.
- padding_bytes += __ load_const_size();
+ slop_delta = __ load_const_size();
} else {
// Worse case, offset does not fit in displacement field.
- __ load_const(Z_method, v_off); // Z_method temporarily holds the offset value.
+ // worst case actual size
+ slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_method, v_off, true);
__ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/);
}
+ slop_bytes += slop_delta;
#ifndef PRODUCT
if (DebugVtables) {
- Label L;
+ NearLabel L;
__ z_ltgr(Z_method, Z_method);
__ z_brne(L);
- __ stop("Vtable entry is ZERO",102);
+ __ stop("Vtable entry is ZERO", 102);
__ bind(L);
}
#endif
- address ame_addr = __ pc(); // ame = abstract method error
-
- // Must do an explicit check if implicit checks are disabled.
+ // Must do an explicit check if offset too large or implicit checks are disabled.
+ address ame_addr = __ pc();
__ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset()));
__ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
__ z_br(Z_R1_scratch);
masm->flush();
-
- s->set_exception_points(npe_addr, ame_addr);
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- const int code_length = VtableStub::pd_code_size_limit(false);
- VtableStub *s = new(code_length) VtableStub(false, itable_index);
- if (s == NULL) { // Indicates OOM in the code cache.
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+ // Can be NULL if there is no free space in the code cache.
+ if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
ResourceMark rm;
- CodeBuffer cb(s->entry_point(), code_length);
- MacroAssembler *masm = new MacroAssembler(&cb);
- int padding_bytes = 0;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
+ MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
- // Count unused bytes
- // worst case actual size
- padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
-
+ // worst case actual size
+ slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
// Use generic emitter for direct memory increment.
- // Use Z_tmp_1 as scratch register for generic emitter.
- __ add2mem_32((Z_R1_scratch), 1, Z_tmp_1);
+ // Abuse Z_method as scratch register for generic emitter.
+ // It is loaded further down anyway before it is first used.
+ // No dynamic code size variance here, increment is 1, always.
+ __ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
}
#endif
@@ -178,7 +192,7 @@
interface = Z_tmp_2;
// Get receiver klass.
- // Must do an explicit check if implicit checks are disabled.
+ // Must do an explicit check if offset too large or implicit checks are disabled.
address npe_addr = __ pc(); // npe == NULL ptr exception
__ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
__ load_klass(rcvr_klass, Z_ARG1);
@@ -195,10 +209,10 @@
#ifndef PRODUCT
if (DebugVtables) {
- Label ok1;
+ NearLabel ok1;
__ z_ltgr(Z_method, Z_method);
__ z_brne(ok1);
- __ stop("method is null",103);
+ __ stop("method is null", 103);
__ bind(ok1);
}
#endif
@@ -213,39 +227,24 @@
// Handle IncompatibleClassChangeError in itable stubs.
__ bind(no_such_interface);
- // Count unused bytes
- // worst case actual size
- // We force resolving of the call site by jumping to
- // the "handle wrong method" stub, and so let the
+ // more detailed IncompatibleClassChangeError
+ // we force re-resolving of the call site by jumping to
+ // the "handle wrong method" stub, thus letting the
// interpreter runtime do all the dirty work.
- padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
+ // worst case actual size
+ slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ z_br(Z_R1_scratch);
masm->flush();
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-// In order to tune these parameters, run the JVM with VM options
-// +PrintMiscellaneous and +WizardMode to see information about
-// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- int size = DebugVtables ? 216 : 0;
- if (CountCompiledCalls) {
- size += 6 * 4;
- }
- size += is_vtable_stub ? 36 : 140;
- if (UseCompressedClassPointers) {
- size += MacroAssembler::instr_size_for_decode_klass_not_null();
- }
- if (!ImplicitNullChecks) {
- size += 36;
- }
- return size;
-}
-
int VtableStub::pd_code_alignment() {
+ // System z cache line size is 256 bytes, but octoword-alignment is quite ok.
const unsigned int icache_line_size = 32;
return icache_line_size;
}
--- a/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,32 +41,38 @@
#define __ masm->
-
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
#endif
// Used by compiler only; may use only caller saved, non-argument registers
-// NOTE: %%%% if any change is made to this stub make sure that the function
-// pd_code_size_limit is changed to ensure the correct size for VtableStub
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
- const int sparc_code_length = VtableStub::pd_code_size_limit(true);
- VtableStub* s = new(sparc_code_length) VtableStub(true, vtable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), sparc_code_length);
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+ const int index_dependent_slop = ((vtable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?).
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G5, G3_scratch);
}
-#endif /* PRODUCT */
+#endif // PRODUCT
assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0");
@@ -74,20 +80,33 @@
address npe_addr = __ pc();
__ load_klass(O0, G3_scratch);
- // set Method* (in case of interpreted method), and destination address
#ifndef PRODUCT
if (DebugVtables) {
Label L;
// check offset vs vtable length
__ ld(G3_scratch, in_bytes(Klass::vtable_length_offset()), G5);
__ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L);
+
+ // set generates 8 instructions (worst case), 1 instruction (best case)
+ start_pc = __ pc();
__ set(vtable_index, O2);
+ slop_delta = __ worst_case_insts_for_set()*BytesPerInstWord - (__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
+ // there is no variance in call_VM() emitted code.
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2);
__ bind(L);
}
#endif
+ // set Method* (in case of interpreted method), and destination address
+ start_pc = __ pc();
__ lookup_virtual_method(G3_scratch, vtable_index, G5_method);
+ // lookup_virtual_method generates 3 instructions (worst case), 1 instruction (best case)
+ slop_delta = 3*BytesPerInstWord - (int)(__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
@@ -109,37 +128,41 @@
__ delayed()->nop();
masm->flush();
+ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
- vtable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // shut the door on sizing bugs
- int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one
- assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-// NOTE: %%%% if any change is made to this stub make sure that the function
-// pd_code_size_limit is changed to ensure the correct size for VtableStub
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- const int sparc_code_length = VtableStub::pd_code_size_limit(false);
- VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+ const int index_dependent_slop = ((itable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?).
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), sparc_code_length);
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+// Use G3_scratch, G4_scratch as work regs for inc_counter.
+// These are defined before use further down.
+ __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G3_scratch, G4_scratch);
+ }
+#endif // PRODUCT
+
Register G3_Klass = G3_scratch;
Register G5_icholder = G5; // Passed in as an argument
Register G4_interface = G4_scratch;
@@ -160,15 +183,10 @@
// and so those registers are not available here.
__ save(SP,-frame::register_save_words*wordSize,SP);
-#ifndef PRODUCT
- if (CountCompiledCalls) {
- __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), L0, L1);
- }
-#endif /* PRODUCT */
+ Label L_no_such_interface;
+ Register L5_method = L5;
- Label L_no_such_interface;
-
- Register L5_method = L5;
+ start_pc = __ pc();
// Receiver subtype check against REFC.
__ ld_ptr(G5_icholder, CompiledICHolder::holder_klass_offset(), G4_interface);
@@ -179,6 +197,9 @@
L_no_such_interface,
/*return_method=*/ false);
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
+ start_pc = __ pc();
+
// Get Method* and entrypoint for compiler
__ ld_ptr(G5_icholder, CompiledICHolder::holder_metadata_offset(), G4_interface);
__ lookup_interface_method(// inputs: rec. class, interface, itable index
@@ -187,6 +208,19 @@
L5_method, L2, L3,
L_no_such_interface);
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+ // Reduce "estimate" such that "padding" does not drop below 8.
+ // Do not target a left-over number of zero, because a very
+ // large vtable or itable offset (> 4K) will require an extra
+ // sethi/or pair of instructions.
+ // Found typecheck(60) + lookup(72) to exceed previous extimate (32*4).
+ const ptrdiff_t estimate = 36*BytesPerInstWord;
+ const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
+ slop_delta = (int)(estimate - codesize);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
#ifndef PRODUCT
if (DebugVtables) {
Label L01;
@@ -222,88 +256,12 @@
__ delayed()->restore();
masm->flush();
+ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
- itable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // shut the door on sizing bugs
- int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one
- assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- if (DebugVtables || CountCompiledCalls || VerifyOops) return 1000;
- else {
- const int slop = 2*BytesPerInstWord; // sethi;add (needed for long offsets)
- if (is_vtable_stub) {
- // ld;ld;ld,jmp,nop
- const int basic = 5*BytesPerInstWord +
- // shift;add for load_klass (only shift with zero heap based)
- (UseCompressedClassPointers ?
- MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
- return basic + slop;
- } else {
- const int basic = 54 * BytesPerInstWord +
- // shift;add for load_klass (only shift with zero heap based)
- (UseCompressedClassPointers ?
- MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
- return (basic + slop);
- }
- }
-
- // In order to tune these parameters, run the JVM with VM options
- // +PrintMiscellaneous and +WizardMode to see information about
- // actual itable stubs. Look for lines like this:
- // itable #1 at 0x5551212[116] left over: 8
- // Reduce the constants so that the "left over" number is 8
- // Do not aim at a left-over number of zero, because a very
- // large vtable or itable offset (> 4K) will require an extra
- // sethi/or pair of instructions.
- //
- // The JVM98 app. _202_jess has a megamorphic interface call.
- // The itable code looks like this:
- // Decoding VtableStub itbl[1]@16
- // ld [ %o0 + 4 ], %g3
- // save %sp, -64, %sp
- // ld [ %g3 + 0xe8 ], %l2
- // sll %l2, 2, %l2
- // add %l2, 0x134, %l2
- // add %g3, %l2, %l2
- // add %g3, 4, %g3
- // ld [ %l2 ], %l5
- // brz,pn %l5, throw_icce
- // cmp %l5, %g5
- // be %icc, success
- // add %l2, 8, %l2
- // loop:
- // ld [ %l2 ], %l5
- // brz,pn %l5, throw_icce
- // cmp %l5, %g5
- // bne,pn %icc, loop
- // add %l2, 8, %l2
- // success:
- // ld [ %l2 + -4 ], %l2
- // ld [ %g3 + %l2 ], %l5
- // restore %l5, 0, %g5
- // ld [ %g5 + 0x44 ], %g3
- // jmp %g3
- // nop
- // throw_icce:
- // sethi %hi(throw_ICCE_entry), %g3
- // ! 5 more instructions here, LP64_ONLY
- // jmp %g3 + %lo(throw_ICCE_entry)
- // restore
-}
-
-
int VtableStub::pd_code_alignment() {
// UltraSPARC cache line size is 8 instructions:
const unsigned int icache_line_size = 32;
--- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -55,25 +55,34 @@
// Available now, but may become callee-save at some point:
// rsi, rdi
// Note that rax and rdx are also used for return values.
-//
+
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
- const int i486_code_length = VtableStub::pd_code_size_limit(true);
- VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), i486_code_length);
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
+ const int index_dependent_slop = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
-
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
-#endif /* PRODUCT */
+#endif
// get receiver (need to skip return address on top of stack)
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
@@ -85,11 +94,21 @@
#ifndef PRODUCT
if (DebugVtables) {
Label L;
+ start_pc = __ pc();
// check offset vs vtable length
__ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
+ slop_delta = 6 - (__ pc() - start_pc); // cmpl varies in length, depending on data
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
__ jcc(Assembler::greater, L);
__ movl(rbx, vtable_index);
+ // VTABLE TODO: find upper bound for call_VM length.
+ start_pc = __ pc();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
+ slop_delta = 480 - (__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ bind(L);
}
#endif // PRODUCT
@@ -97,8 +116,13 @@
const Register method = rbx;
// load Method* and target address
+ start_pc = __ pc();
__ lookup_virtual_method(rax, vtable_index, method);
+ slop_delta = 6 - (int)(__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cmpptr(method, (int32_t)NULL_WORD);
@@ -108,55 +132,53 @@
__ stop("Vtable entry is NULL");
__ bind(L);
}
+#endif // PRODUCT
- // rax,: receiver klass
+ // rax: receiver klass
// method (rbx): Method*
// rcx: receiver
address ame_addr = __ pc();
__ jmp( Address(method, Method::from_compiled_offset()));
masm->flush();
+ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
- vtable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // shut the door on sizing bugs
- int slop = 3; // 32-bit offset is this much larger than an 8-bit one
- assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- // Note well: pd_code_size_limit is the absolute minimum we can get away with. If you
- // add code here, bump the code stub size returned by pd_code_size_limit!
- const int i486_code_length = VtableStub::pd_code_size_limit(false);
- VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+ const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32).
+ (itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code.
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), i486_code_length);
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
+#if (!defined(PRODUCT) && defined(COMPILER2))
+ if (CountCompiledCalls) {
+ __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+ }
+#endif /* PRODUCT */
+
// Entry arguments:
// rax: CompiledICHolder
// rcx: Receiver
-#ifndef PRODUCT
- if (CountCompiledCalls) {
- __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
- }
-#endif /* PRODUCT */
-
// Most registers are in use; we'll use rax, rbx, rsi, rdi
// (If we need to make rsi, rdi callee-save, do a push/pop here.)
const Register recv_klass_reg = rsi;
@@ -171,10 +193,12 @@
Label L_no_such_interface;
// get receiver klass (also an implicit null-check)
+ assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
address npe_addr = __ pc();
- assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
__ load_klass(recv_klass_reg, rcx);
+ start_pc = __ pc();
+
// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
@@ -184,6 +208,9 @@
L_no_such_interface,
/*return_method=*/false);
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
+ start_pc = __ pc();
+
// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, rcx); // restore recv_klass_reg
@@ -193,19 +220,30 @@
method, temp_reg,
L_no_such_interface);
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+ // We expect we need index_dependent_slop extra bytes. Reason:
+ // The emitted code in lookup_interface_method changes when itable_index exceeds 31.
+ // For windows, a narrow estimate was found to be 104. Other OSes not tested.
+ const ptrdiff_t estimate = 104;
+ const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
+ slop_delta = (int)(estimate - codesize);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
// method (rbx): Method*
// rcx: receiver
#ifdef ASSERT
if (DebugVtables) {
- Label L1;
- __ cmpptr(method, (int32_t)NULL_WORD);
- __ jcc(Assembler::equal, L1);
- __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
- __ jcc(Assembler::notZero, L1);
- __ stop("Method* is null");
- __ bind(L1);
- }
+ Label L1;
+ __ cmpptr(method, (int32_t)NULL_WORD);
+ __ jcc(Assembler::equal, L1);
+ __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
+ __ jcc(Assembler::notZero, L1);
+ __ stop("Method* is null");
+ __ bind(L1);
+ }
#endif // ASSERT
address ame_addr = __ pc();
@@ -219,70 +257,15 @@
// dirty work.
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
- __ flush();
+ masm->flush();
+ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
- itable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // shut the door on sizing bugs
- int slop = 3; // 32-bit offset is this much larger than an 8-bit one
- assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-
-
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- if (is_vtable_stub) {
- // Vtable stub size
- return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
- } else {
- // Itable stub size
- return (DebugVtables ? 256 : 110) + (CountCompiledCalls ? 6 : 0);
- }
- // In order to tune these parameters, run the JVM with VM options
- // +PrintMiscellaneous and +WizardMode to see information about
- // actual itable stubs. Look for lines like this:
- // itable #1 at 0x5551212[65] left over: 3
- // Reduce the constants so that the "left over" number is >=3
- // for the common cases.
- // Do not aim at a left-over number of zero, because a
- // large vtable or itable index (> 16) will require a 32-bit
- // immediate displacement instead of an 8-bit one.
- //
- // The JVM98 app. _202_jess has a megamorphic interface call.
- // The itable code looks like this:
- // Decoding VtableStub itbl[1]@1
- // mov 0x4(%ecx),%esi
- // mov 0xe8(%esi),%edi
- // lea 0x130(%esi,%edi,4),%edi
- // add $0x7,%edi
- // and $0xfffffff8,%edi
- // lea 0x4(%esi),%esi
- // mov (%edi),%ebx
- // cmp %ebx,%eax
- // je success
- // loop:
- // test %ebx,%ebx
- // je throw_icce
- // add $0x8,%edi
- // mov (%edi),%ebx
- // cmp %ebx,%eax
- // jne loop
- // success:
- // mov 0x4(%edi),%edi
- // mov (%esi,%edi,1),%ebx
- // jmp *0x44(%ebx)
- // throw_icce:
- // jmp throw_ICCE_entry
+int VtableStub::pd_code_alignment() {
+ // x86 cache line size is 64 bytes, but we want to limit alignment loss.
+ const unsigned int icache_line_size = wordSize;
+ return icache_line_size;
}
-
-int VtableStub::pd_code_alignment() {
- return wordSize;
-}
--- a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -42,24 +42,32 @@
#define __ masm->
#ifndef PRODUCT
-extern "C" void bad_compiled_vtable_index(JavaThread* thread,
- oop receiver,
- int index);
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
#endif
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
- const int amd64_code_length = VtableStub::pd_code_size_limit(true);
- VtableStub* s = new(amd64_code_length) VtableStub(true, vtable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(true);
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), amd64_code_length);
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
+ const int index_dependent_slop = 0;
+
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
@@ -77,22 +85,35 @@
#ifndef PRODUCT
if (DebugVtables) {
Label L;
+ start_pc = __ pc();
// check offset vs vtable length
- __ cmpl(Address(rax, Klass::vtable_length_offset()),
- vtable_index * vtableEntry::size());
+ __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
+ slop_delta = 12 - (__ pc() - start_pc); // cmpl varies in length, depending on data
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
__ jcc(Assembler::greater, L);
__ movl(rbx, vtable_index);
- __ call_VM(noreg,
- CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx);
+ // VTABLE TODO: find upper bound for call_VM length.
+ start_pc = __ pc();
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx);
+ slop_delta = 480 - (__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ bind(L);
}
#endif // PRODUCT
- // load Method* and target address
const Register method = rbx;
+ // load Method* and target address
+ start_pc = __ pc();
__ lookup_virtual_method(rax, vtable_index, method);
+ slop_delta = 8 - (int)(__ pc() - start_pc);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cmpptr(method, (int32_t)NULL_WORD);
@@ -102,50 +123,48 @@
__ stop("Vtable entry is NULL");
__ bind(L);
}
+#endif // PRODUCT
+
// rax: receiver klass
- // rbx: Method*
+ // method (rbx): Method*
// rcx: receiver
address ame_addr = __ pc();
__ jmp( Address(rbx, Method::from_compiled_offset()));
- __ flush();
+ masm->flush();
+ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
- vtable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // shut the door on sizing bugs
- int slop = 3; // 32-bit offset is this much larger than an 8-bit one
- assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
- // Note well: pd_code_size_limit is the absolute minimum we can get
- // away with. If you add code here, bump the code stub size
- // returned by pd_code_size_limit!
- const int amd64_code_length = VtableStub::pd_code_size_limit(false);
- VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index);
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+ const int stub_code_length = code_size_limit(false);
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be NULL if there is no free space in the code cache.
if (s == NULL) {
return NULL;
}
+ // Count unused bytes in instruction sequences of variable size.
+ // We add them to the computed buffer size in order to avoid
+ // overflow in subsequently generated stubs.
+ address start_pc;
+ int slop_bytes = 0;
+ int slop_delta = 0;
+ const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 16).
+ (itable_index < 16) ? 3 : 0; // index == 0 generates even shorter code.
- ResourceMark rm;
- CodeBuffer cb(s->entry_point(), amd64_code_length);
- MacroAssembler* masm = new MacroAssembler(&cb);
+ ResourceMark rm;
+ CodeBuffer cb(s->entry_point(), stub_code_length);
+ MacroAssembler *masm = new MacroAssembler(&cb);
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
-#endif
+#endif // PRODUCT
// Entry arguments:
// rax: CompiledICHolder
@@ -158,17 +177,19 @@
const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
const Register temp_reg = r11;
- Label L_no_such_interface;
-
const Register icholder_reg = rax;
__ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
+ Label L_no_such_interface;
+
// get receiver klass (also an implicit null-check)
assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
address npe_addr = __ pc();
__ load_klass(recv_klass_reg, j_rarg0);
+ start_pc = __ pc();
+
// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
@@ -178,6 +199,9 @@
L_no_such_interface,
/*return_method=*/false);
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
+ start_pc = __ pc();
+
// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg
@@ -187,6 +211,17 @@
method, temp_reg,
L_no_such_interface);
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+ // We expect we need index_dependent_slop extra bytes. Reason:
+ // The emitted code in lookup_interface_method changes when itable_index exceeds 15.
+ // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130).
+ const ptrdiff_t estimate = 136;
+ const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
+ slop_delta = (int)(estimate - codesize);
+ slop_bytes += slop_delta;
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
// If we take a trap while this arg is on the stack we will not
// be able to walk the stack properly. This is not an issue except
// when there are mistakes in this assembly code that could generate
@@ -207,8 +242,6 @@
}
#endif // ASSERT
- // rbx: Method*
- // j_rarg0: receiver
address ame_addr = __ pc();
__ jmp(Address(method, Method::from_compiled_offset()));
@@ -220,68 +253,15 @@
// dirty work.
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
- __ flush();
+ masm->flush();
+ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
- itable_index, p2i(s->entry_point()),
- (int)(s->code_end() - s->entry_point()),
- (int)(s->code_end() - __ pc()));
- }
- guarantee(__ pc() <= s->code_end(), "overflowed buffer");
- // shut the door on sizing bugs
- int slop = 3; // 32-bit offset is this much larger than an 8-bit one
- assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
- s->set_exception_points(npe_addr, ame_addr);
return s;
}
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
- if (is_vtable_stub) {
- // Vtable stub size
- return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) +
- (UseCompressedClassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
- } else {
- // Itable stub size
- return (DebugVtables ? 512 : 140) + (CountCompiledCalls ? 13 : 0) +
- (UseCompressedClassPointers ? 2 * MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
- }
- // In order to tune these parameters, run the JVM with VM options
- // +PrintMiscellaneous and +WizardMode to see information about
- // actual itable stubs. Look for lines like this:
- // itable #1 at 0x5551212[71] left over: 3
- // Reduce the constants so that the "left over" number is >=3
- // for the common cases.
- // Do not aim at a left-over number of zero, because a
- // large vtable or itable index (>= 32) will require a 32-bit
- // immediate displacement instead of an 8-bit one.
- //
- // The JVM98 app. _202_jess has a megamorphic interface call.
- // The itable code looks like this:
- // Decoding VtableStub itbl[1]@12
- // mov 0x8(%rsi),%r10
- // mov 0x198(%r10),%r11d
- // lea 0x218(%r10,%r11,8),%r11
- // lea 0x8(%r10),%r10
- // mov (%r11),%rbx
- // cmp %rbx,%rax
- // je success
- // loop:
- // test %rbx,%rbx
- // je throw_icce
- // add $0x10,%r11
- // mov (%r11),%rbx
- // cmp %rbx,%rax
- // jne loop
- // success:
- // mov 0x8(%r11),%r11d
- // mov (%r10,%r11,1),%rbx
- // jmpq *0x60(%rbx)
- // throw_icce:
- // jmpq throw_ICCE_entry
+int VtableStub::pd_code_alignment() {
+ // x86 cache line size is 64 bytes, but we want to limit alignment loss.
+ const unsigned int icache_line_size = wordSize;
+ return icache_line_size;
}
-
-int VtableStub::pd_code_alignment() {
- return wordSize;
-}
--- a/src/hotspot/share/code/vtableStubs.cpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/share/code/vtableStubs.cpp Mon Sep 03 09:43:08 2018 +0200
@@ -92,6 +92,32 @@
VtableStub* VtableStubs::_table[VtableStubs::N];
int VtableStubs::_number_of_vtable_stubs = 0;
+int VtableStubs::_vtab_stub_size = 0;
+int VtableStubs::_itab_stub_size = 0;
+
+#if defined(PRODUCT)
+ // These values are good for the PRODUCT case (no tracing).
+ static const int first_vtableStub_size = 64;
+ static const int first_itableStub_size = 256;
+#else
+ // These values are good for the non-PRODUCT case (when tracing can be switched on).
+ // To find out, run test workload with
+ // -Xlog:vtablestubs=Trace -XX:+CountCompiledCalls -XX:+DebugVtables
+ // and use the reported "estimate" value.
+ // Here is a list of observed worst-case values:
+ // vtable itable
+ // aarch64: 460 324
+ // arm: ? ?
+ // ppc (linux, BE): 404 288
+ // ppc (linux, LE): 356 276
+ // ppc (AIX): 416 296
+ // s390x: 408 256
+ // Solaris-sparc: 792 348
+ // x86 (Linux): 670 309
+ // x86 (MacOS): 682 321
+ static const int first_vtableStub_size = 1024;
+ static const int first_itableStub_size = 512;
+#endif
void VtableStubs::initialize() {
@@ -107,6 +133,77 @@
}
+int VtableStubs::code_size_limit(bool is_vtable_stub) {
+ if (is_vtable_stub) {
+ return _vtab_stub_size > 0 ? _vtab_stub_size : first_vtableStub_size;
+ } else { // itable stub
+ return _itab_stub_size > 0 ? _itab_stub_size : first_itableStub_size;
+ }
+} // code_size_limit
+
+
+void VtableStubs::check_and_set_size_limit(bool is_vtable_stub,
+ int code_size,
+ int padding) {
+ const char* name = is_vtable_stub ? "vtable" : "itable";
+
+ guarantee(code_size <= code_size_limit(is_vtable_stub),
+ "buffer overflow in %s stub, code_size is %d, limit is %d", name, code_size, code_size_limit(is_vtable_stub));
+
+ if (is_vtable_stub) {
+ if (log_is_enabled(Trace, vtablestubs)) {
+ if ( (_vtab_stub_size > 0) && ((code_size + padding) > _vtab_stub_size) ) {
+ log_trace(vtablestubs)("%s size estimate needed adjustment from %d to %d bytes",
+ name, _vtab_stub_size, code_size + padding);
+ }
+ }
+ if ( (code_size + padding) > _vtab_stub_size ) {
+ _vtab_stub_size = code_size + padding;
+ }
+ } else { // itable stub
+ if (log_is_enabled(Trace, vtablestubs)) {
+ if ( (_itab_stub_size > 0) && ((code_size + padding) > _itab_stub_size) ) {
+ log_trace(vtablestubs)("%s size estimate needed adjustment from %d to %d bytes",
+ name, _itab_stub_size, code_size + padding);
+ }
+ }
+ if ( (code_size + padding) > _itab_stub_size ) {
+ _itab_stub_size = code_size + padding;
+ }
+ }
+ return;
+} // check_and_set_size_limit
+
+
+void VtableStubs::bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s,
+ address npe_addr, address ame_addr, bool is_vtable_stub,
+ int index, int slop_bytes, int index_dependent_slop) {
+ const char* name = is_vtable_stub ? "vtable" : "itable";
+ const int stub_length = code_size_limit(is_vtable_stub);
+
+ if (log_is_enabled(Trace, vtablestubs)) {
+ log_trace(vtablestubs)("%s #%d at " PTR_FORMAT ": size: %d, estimate: %d, slop area: %d",
+ name, index, p2i(s->code_begin()),
+ (int)(masm->pc() - s->code_begin()),
+ stub_length,
+ (int)(s->code_end() - masm->pc()));
+ }
+ guarantee(masm->pc() <= s->code_end(), "%s #%d: overflowed buffer, estimated len: %d, actual len: %d, overrun: %d",
+ name, index, stub_length,
+ (int)(masm->pc() - s->code_begin()),
+ (int)(masm->pc() - s->code_end()));
+ assert((masm->pc() + index_dependent_slop) <= s->code_end(), "%s #%d: spare space for 32-bit offset: required = %d, available = %d",
+ name, index, index_dependent_slop,
+ (int)(s->code_end() - masm->pc()));
+
+ // After the first vtable/itable stub is generated, we have a much
+ // better estimate for the stub size. Remember/update this
+ // estimate after some sanity checks.
+ check_and_set_size_limit(is_vtable_stub, masm->offset(), slop_bytes);
+ s->set_exception_points(npe_addr, ame_addr);
+}
+
+
address VtableStubs::find_stub(bool is_vtable_stub, int vtable_index) {
assert(vtable_index >= 0, "must be positive");
@@ -173,10 +270,7 @@
uint hash = VtableStubs::hash(stub->is_vtable_stub(), stub->index());
VtableStub* s;
for (s = _table[hash]; s != NULL && s != stub; s = s->next()) {}
- if (s == stub) {
- return s;
- }
- return NULL;
+ return (s == stub) ? s : NULL;
}
bool VtableStubs::contains(address pc) {
--- a/src/hotspot/share/code/vtableStubs.hpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/share/code/vtableStubs.hpp Mon Sep 03 09:43:08 2018 +0200
@@ -25,12 +25,94 @@
#ifndef SHARE_VM_CODE_VTABLESTUBS_HPP
#define SHARE_VM_CODE_VTABLESTUBS_HPP
+#include "asm/macroAssembler.hpp"
#include "code/vmreg.hpp"
#include "memory/allocation.hpp"
// A VtableStub holds an individual code stub for a pair (vtable index, #args) for either itables or vtables
// There's a one-to-one relationship between a VtableStub and such a pair.
+// A word on VtableStub sizing:
+// Such a vtable/itable stub consists of the instance data
+// and an immediately following CodeBuffer.
+// Unfortunately, the required space for the code buffer varies, depending on
+// the setting of compile time macros (PRODUCT, ASSERT, ...) and of command line
+// parameters. Actual data may have an influence on the size as well.
+//
+// A simple approximation for the VtableStub size would be to just take a value
+// "large enough" for all circumstances - a worst case estimate.
+// As there can exist many stubs - and they never go away - we certainly don't
+// want to waste more code cache space than absolutely necessary.
+//
+// We need a different approach which, as far as possible, should be independent
+// from or adaptive to code size variations. These variations may be caused by
+// changed compile time or run time switches as well as by changed emitter code.
+//
+// Here is the idea:
+// For the first stub we generate, we allocate a "large enough" code buffer.
+// Once all instructions are emitted, we know the actual size of the stub.
+// Remembering that size allows us to allocate a tightly matching code buffer
+// for all subsequent stubs. That covers all "static variance", i.e. all variance
+// that is due to compile time macros, command line parameters, machine capabilities,
+// and other influences which are immutable for the life span of the vm.
+//
+// Life isn't always that easy. Code size may depend on actual data, "load constant"
+// being an example for that. All code segments with such "dynamic variance" require
+// additional care. We need to know or estimate the worst case code size for each
+// such segment. With that knowledge, we can maintain a "slop counter" in the
+// platform-specific stub emitters. It accumulates the difference between worst-case
+// and actual code size. When the stub is fully generated, the actual stub size is
+// adjusted (increased) by the slop counter value.
+//
+// As a result, we allocate all but the first code buffers with the same, tightly matching size.
+//
+
+// VtableStubs creates the code stubs for compiled calls through vtables.
+// There is one stub per (vtable index, args_size) pair, and the stubs are
+// never deallocated. They don't need to be GCed because they contain no oops.
+class VtableStub;
+
+class VtableStubs : AllStatic {
+ public: // N must be public (some compilers need this for _table)
+ enum {
+ N = 256, // size of stub table; must be power of two
+ mask = N - 1
+ };
+
+ private:
+ friend class VtableStub;
+ static VtableStub* _table[N]; // table of existing stubs
+ static int _number_of_vtable_stubs; // number of stubs created so far (for statistics)
+ static int _vtab_stub_size; // current size estimate for vtable stub (quasi-constant)
+ static int _itab_stub_size; // current size estimate for itable stub (quasi-constant)
+
+ static VtableStub* create_vtable_stub(int vtable_index);
+ static VtableStub* create_itable_stub(int vtable_index);
+ static VtableStub* lookup (bool is_vtable_stub, int vtable_index);
+ static void enter (bool is_vtable_stub, int vtable_index, VtableStub* s);
+ static inline uint hash (bool is_vtable_stub, int vtable_index);
+ static address find_stub (bool is_vtable_stub, int vtable_index);
+ static void bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s,
+ address npe_addr, address ame_addr, bool is_vtable_stub,
+ int index, int slop_bytes, int index_dependent_slop);
+ static int code_size_limit(bool is_vtable_stub);
+ static void check_and_set_size_limit(bool is_vtable_stub,
+ int code_size,
+ int padding);
+
+ public:
+ static address find_vtable_stub(int vtable_index) { return find_stub(true, vtable_index); }
+ static address find_itable_stub(int itable_index) { return find_stub(false, itable_index); }
+
+ static VtableStub* entry_point(address pc); // vtable stub entry point for a pc
+ static bool contains(address pc); // is pc within any stub?
+ static VtableStub* stub_containing(address pc); // stub containing pc or NULL
+ static int number_of_vtable_stubs() { return _number_of_vtable_stubs; }
+ static void initialize();
+ static void vtable_stub_do(void f(VtableStub*)); // iterates over all vtable stubs
+};
+
+
class VtableStub {
private:
friend class VtableStubs;
@@ -58,7 +140,7 @@
public:
address code_begin() const { return (address)(this + 1); }
- address code_end() const { return code_begin() + pd_code_size_limit(_is_vtable_stub); }
+ address code_end() const { return code_begin() + VtableStubs::code_size_limit(_is_vtable_stub); }
address entry_point() const { return code_begin(); }
static int entry_offset() { return sizeof(class VtableStub); }
@@ -78,7 +160,6 @@
}
// platform-dependent routines
- static int pd_code_size_limit(bool is_vtable_stub);
static int pd_code_alignment();
// CNC: Removed because vtable stubs are now made with an ideal graph
// static bool pd_disregard_arg_size();
@@ -100,38 +181,4 @@
};
-
-// VtableStubs creates the code stubs for compiled calls through vtables.
-// There is one stub per (vtable index, args_size) pair, and the stubs are
-// never deallocated. They don't need to be GCed because they contain no oops.
-
-class VtableStubs : AllStatic {
- public: // N must be public (some compilers need this for _table)
- enum {
- N = 256, // size of stub table; must be power of two
- mask = N - 1
- };
-
- private:
- static VtableStub* _table[N]; // table of existing stubs
- static int _number_of_vtable_stubs; // number of stubs created so far (for statistics)
-
- static VtableStub* create_vtable_stub(int vtable_index);
- static VtableStub* create_itable_stub(int vtable_index);
- static VtableStub* lookup (bool is_vtable_stub, int vtable_index);
- static void enter (bool is_vtable_stub, int vtable_index, VtableStub* s);
- static inline uint hash (bool is_vtable_stub, int vtable_index);
- static address find_stub (bool is_vtable_stub, int vtable_index);
-
- public:
- static address find_vtable_stub(int vtable_index) { return find_stub(true, vtable_index); }
- static address find_itable_stub(int itable_index) { return find_stub(false, itable_index); }
- static VtableStub* entry_point(address pc); // vtable stub entry point for a pc
- static bool contains(address pc); // is pc within any stub?
- static VtableStub* stub_containing(address pc); // stub containing pc or NULL
- static int number_of_vtable_stubs() { return _number_of_vtable_stubs; }
- static void initialize();
- static void vtable_stub_do(void f(VtableStub*)); // iterates over all vtable stubs
-};
-
#endif // SHARE_VM_CODE_VTABLESTUBS_HPP
--- a/src/hotspot/share/logging/logTag.hpp Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/share/logging/logTag.hpp Mon Sep 03 09:43:08 2018 +0200
@@ -170,7 +170,8 @@
LOG_TAG(vmoperation) \
LOG_TAG(vmthread) \
LOG_TAG(vtables) \
- LOG_TAG(workgang)
+ LOG_TAG(vtablestubs) \
+ LOG_TAG(workgang) \
LOG_TAG_LIST_EXT
#define PREFIX_LOG_TAG(T) (LogTag::_##T)