8207343: Automate vtable/itable stub size calculation
authorlucy
Mon, 03 Sep 2018 09:43:08 +0200
changeset 51618 54b344d9dd4e
parent 51617 9720ad0a40b6
child 51619 dca697c71e5d
8207343: Automate vtable/itable stub size calculation Reviewed-by: kvn, mdoerr
src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp
src/hotspot/cpu/arm/vtableStubs_arm.cpp
src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp
src/hotspot/cpu/s390/vtableStubs_s390.cpp
src/hotspot/cpu/sparc/vtableStubs_sparc.cpp
src/hotspot/cpu/x86/vtableStubs_x86_32.cpp
src/hotspot/cpu/x86/vtableStubs_x86_64.cpp
src/hotspot/share/code/vtableStubs.cpp
src/hotspot/share/code/vtableStubs.hpp
src/hotspot/share/logging/logTag.hpp
--- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -44,24 +44,30 @@
 #define __ masm->
 
 #ifndef PRODUCT
-extern "C" void bad_compiled_vtable_index(JavaThread* thread,
-                                          oop receiver,
-                                          int index);
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
 #endif
 
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  const int aarch64_code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), aarch64_code_length);
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
     __ lea(r16, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
     __ incrementw(Address(r16));
@@ -78,21 +84,35 @@
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
+    // TODO: find upper bound for this debug code.
+    start_pc = __ pc();
+
     // check offset vs vtable length
     __ ldrw(rscratch1, Address(r16, Klass::vtable_length_offset()));
     __ cmpw(rscratch1, vtable_index * vtableEntry::size());
     __ br(Assembler::GT, L);
     __ enter();
     __ mov(r2, vtable_index);
-    __ call_VM(noreg,
-               CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
+
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2);
+    const ptrdiff_t estimate = 256;
+    const ptrdiff_t codesize = __ pc() - start_pc;
+    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
+
     __ leave();
     __ bind(L);
   }
 #endif // PRODUCT
 
+  start_pc = __ pc();
   __ lookup_virtual_method(r16, vtable_index, rmethod);
+  slop_delta  = 8 - (int)(__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
 
+#ifndef PRODUCT
   if (DebugVtables) {
     Label L;
     __ cbz(rmethod, L);
@@ -101,6 +121,8 @@
     __ stop("Vtable entry is NULL");
     __ bind(L);
   }
+#endif // PRODUCT
+
   // r0: receiver klass
   // rmethod: Method*
   // r2: receiver
@@ -108,43 +130,46 @@
   __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
   __ br(rscratch1);
 
-  __ flush();
+  masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  vtable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  // Note well: pd_code_size_limit is the absolute minimum we can get
-  // away with.  If you add code here, bump the code stub size
-  // returned by pd_code_size_limit!
-  const int code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(code_length) VtableStub(false, itable_index);
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), code_length);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+  // Can be NULL if there is no free space in the code cache.
+  if (s == NULL) {
+    return NULL;
+  }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
     __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
     __ incrementw(Address(r10));
   }
 #endif
 
+  // get receiver (need to skip return address on top of stack)
+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
   // Entry arguments:
   //  rscratch2: CompiledICHolder
   //  j_rarg0: Receiver
 
-
   // Most registers are in use; we'll use r16, rmethod, r10, r11
   const Register recv_klass_reg     = r10;
   const Register holder_klass_reg   = r16; // declaring interface klass (DECC)
@@ -157,8 +182,8 @@
   __ ldr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
   __ ldr(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
 
-  // get receiver (need to skip return address on top of stack)
-  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+  start_pc = __ pc();
+
   // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
   __ load_klass(recv_klass_reg, j_rarg0);
@@ -172,16 +197,25 @@
                              L_no_such_interface,
                              /*return_method=*/false);
 
+  const ptrdiff_t  typecheckSize = __ pc() - start_pc;
+  start_pc = __ pc();
+
   // Get selected method from declaring class and itable index
   __ load_klass(recv_klass_reg, j_rarg0);   // restore recv_klass_reg
   __ lookup_interface_method(// inputs: rec. class, interface, itable index
-                       recv_klass_reg, holder_klass_reg, itable_index,
-                       // outputs: method, scan temp. reg
-                       rmethod, temp_reg,
-                       L_no_such_interface);
+                             recv_klass_reg, holder_klass_reg, itable_index,
+                             // outputs: method, scan temp. reg
+                             rmethod, temp_reg,
+                             L_no_such_interface);
+
+  const ptrdiff_t lookupSize = __ pc() - start_pc;
 
-  // method (rmethod): Method*
-  // j_rarg0: receiver
+  // Reduce "estimate" such that "padding" does not drop below 8.
+  const ptrdiff_t estimate = 152;
+  const ptrdiff_t codesize = typecheckSize + lookupSize;
+  slop_delta  = (int)(estimate - codesize);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
 
 #ifdef ASSERT
   if (DebugVtables) {
@@ -206,92 +240,17 @@
   // We force resolving of the call site by jumping to the "handle
   // wrong method" stub, and so let the interpreter runtime do all the
   // dirty work.
+  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
   __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 
-  __ flush();
+  masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  itable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  int size = DebugVtables ? 216 : 0;
-  if (CountCompiledCalls)
-    size += 6 * 4;
-  // FIXME: vtable stubs only need 36 bytes
-  if (is_vtable_stub)
-    size += 52;
-  else
-    size += 176;
-  return size;
-
-  // In order to tune these parameters, run the JVM with VM options
-  // +PrintMiscellaneous and +WizardMode to see information about
-  // actual itable stubs.  Run it with -Xmx31G -XX:+UseCompressedOops.
-  //
-  // If Universe::narrow_klass_base is nonzero, decoding a compressed
-  // class can take zeveral instructions.
-  //
-  // The JVM98 app. _202_jess has a megamorphic interface call.
-  // The itable code looks like this:
-
-  //    ldr    xmethod, [xscratch2,#CompiledICHolder::holder_klass_offset]
-  //    ldr    x0, [xscratch2]
-  //    ldr    w10, [x1,#oopDesc::klass_offset_in_bytes]
-  //    mov    xheapbase, #0x3c000000                //   #narrow_klass_base
-  //    movk    xheapbase, #0x3f7, lsl #32
-  //    add    x10, xheapbase, x10
-  //    mov    xheapbase, #0xe7ff0000                //   #heapbase
-  //    movk    xheapbase, #0x3f7, lsl #32
-  //    ldr    w11, [x10,#vtable_length_offset]
-  //    add    x11, x10, x11, uxtx #3
-  //    add    x11, x11, #itableMethodEntry::method_offset_in_bytes
-  //    ldr    x10, [x11]
-  //    cmp    xmethod, x10
-  //    b.eq    found_method
-  // search:
-  //    cbz    x10, no_such_interface
-  //    add    x11, x11, #0x10
-  //    ldr    x10, [x11]
-  //    cmp    xmethod, x10
-  //    b.ne    search
-  // found_method:
-  //    ldr    w10, [x1,#oopDesc::klass_offset_in_bytes]
-  //    mov    xheapbase, #0x3c000000                //   #narrow_klass_base
-  //    movk    xheapbase, #0x3f7, lsl #32
-  //    add    x10, xheapbase, x10
-  //    mov    xheapbase, #0xe7ff0000                //   #heapbase
-  //    movk    xheapbase, #0x3f7, lsl #32
-  //    ldr    w11, [x10,#vtable_length_offset]
-  //    add    x11, x10, x11, uxtx #3
-  //    add    x11, x11, #itableMethodEntry::method_offset_in_bytes
-  //    add    x10, x10, #itentry_off
-  //    ldr    xmethod, [x11]
-  //    cmp    x0, xmethod
-  //    b.eq    found_method2
-  // search2:
-  //    cbz    xmethod, 0x000003ffa872e6cc
-  //    add    x11, x11, #0x10
-  //    ldr    xmethod, [x11]
-  //    cmp    x0, xmethod
-  //    b.ne    search2
-  // found_method2:
-  //    ldr    w11, [x11,#itableOffsetEntry::offset_offset_in_bytes]
-  //    ldr    xmethod, [x10,w11,uxtw]
-  //    ldr    xscratch1, [xmethod,#Method::from_compiled_offset]
-  //    br    xscratch1
-  // no_such_interface:
-  //    b      throw_ICCE_entry
-
+int VtableStub::pd_code_alignment() {
+  // aarch64 cache line size is not an architected constant. We just align on 4 bytes (instruction size).
+  const unsigned int icache_line_size = 4;
+  return icache_line_size;
 }
-
-int VtableStub::pd_code_alignment() { return 4; }
--- a/src/hotspot/cpu/arm/vtableStubs_arm.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/arm/vtableStubs_arm.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -48,17 +48,31 @@
 #endif
 
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  const int code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new(code_length) VtableStub(true, vtable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), code_length);
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    // Implementation required?
+  }
+#endif
+
   assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
 
   const Register tmp = Rtemp; // Rtemp OK, should be free at call sites
@@ -66,17 +80,33 @@
   address npe_addr = __ pc();
   __ load_klass(tmp, R0);
 
-  {
-  int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
-  int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
+#ifndef PRODUCT
+  if (DebugVtables) {
+    // Implementation required?
+  }
+#endif
+
+  start_pc = __ pc();
+  { // lookup virtual method
+    int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
+    int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset;
 
-  assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
-  int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
-  if (method_offset & ~offset_mask) {
-    __ add(tmp, tmp, method_offset & ~offset_mask);
+    assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned");
+    int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff);
+    if (method_offset & ~offset_mask) {
+      __ add(tmp, tmp, method_offset & ~offset_mask);
+    }
+    __ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
   }
-  __ ldr(Rmethod, Address(tmp, method_offset & offset_mask));
+  slop_delta  = 8 - (int)(__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    // Implementation required?
   }
+#endif
 
   address ame_addr = __ pc();
 #ifdef AARCH64
@@ -87,35 +117,36 @@
 #endif // AARCH64
 
   masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  vtable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // FIXME ARM: need correct 'slop' - below is x86 code
-  // shut the door on sizing bugs
-  //int slop = 8;  // 32-bit offset is this much larger than a 13-bit one
-  //assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  const int code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(code_length) VtableStub(false, itable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), code_length);
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    // Implementation required?
+  }
+#endif
+
   assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0");
 
   // R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled
@@ -123,15 +154,16 @@
   const Register Rintf   = AARCH64_ONLY(R10) NOT_AARCH64(R5);
   const Register Rscan   = AARCH64_ONLY(R11) NOT_AARCH64(R6);
 
+  Label L_no_such_interface;
+
   assert_different_registers(Ricklass, Rclass, Rintf, Rscan, Rtemp);
 
-  // Calculate the start of itable (itable goes after vtable)
-  const int scale = exact_log2(vtableEntry::size_in_bytes());
+  start_pc = __ pc();
+
+  // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
   __ load_klass(Rclass, R0);
 
-  Label L_no_such_interface;
-
   // Receiver subtype check against REFC.
   __ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
   __ lookup_interface_method(// inputs: rec. class, interface, itable index
@@ -140,6 +172,9 @@
                              noreg, Rscan, Rtemp,
                              L_no_such_interface);
 
+  const ptrdiff_t  typecheckSize = __ pc() - start_pc;
+  start_pc = __ pc();
+
   // Get Method* and entry point for compiler
   __ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
   __ lookup_interface_method(// inputs: rec. class, interface, itable index
@@ -148,6 +183,21 @@
                              Rmethod, Rscan, Rtemp,
                              L_no_such_interface);
 
+  const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+  // Reduce "estimate" such that "padding" does not drop below 8.
+  const ptrdiff_t estimate = 140;
+  const ptrdiff_t codesize = typecheckSize + lookupSize;
+  slop_delta  = (int)(estimate - codesize);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    // Implementation required?
+  }
+#endif
+
   address ame_addr = __ pc();
 
 #ifdef AARCH64
@@ -158,7 +208,6 @@
 #endif // AARCH64
 
   __ bind(L_no_such_interface);
-
   // Handle IncompatibleClassChangeError in itable stubs.
   // More detailed error message.
   // We force resolving of the call site by jumping to the "handle
@@ -168,43 +217,13 @@
   __ jump(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type, Rtemp);
 
   masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  itable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // FIXME ARM: need correct 'slop' - below is x86 code
-  // shut the door on sizing bugs
-  //int slop = 8;  // 32-bit offset is this much larger than a 13-bit one
-  //assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  int instr_count;
-
-  if (is_vtable_stub) {
-    // vtable stub size
-    instr_count = NOT_AARCH64(4) AARCH64_ONLY(5);
-  } else {
-    // itable stub size
-    instr_count = NOT_AARCH64(31) AARCH64_ONLY(31);
-  }
-
-#ifdef AARCH64
-  if (UseCompressedClassPointers) {
-    instr_count += MacroAssembler::instr_count_for_decode_klass_not_null();
-  }
-#endif // AARCH64
-
-  return instr_count * Assembler::InstructionSize;
+int VtableStub::pd_code_alignment() {
+  // ARM32 cache line size is not an architected constant. We just align on word size.
+  const unsigned int icache_line_size = wordSize;
+  return icache_line_size;
 }
-
-int VtableStub::pd_code_alignment() {
-  return 8;
-}
--- a/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -39,36 +39,39 @@
 
 #define __ masm->
 
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) // nothing
-#else
-#define BLOCK_COMMENT(str) __ block_comment(str)
-#endif
-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-
 #ifndef PRODUCT
 extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
 #endif
 
-// Used by compiler only; may use only caller saved, non-argument
-// registers.
+// Used by compiler only; may use only caller saved, non-argument registers.
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  // PPC port: use fixed size.
-  const int code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new (code_length) VtableStub(true, vtable_index);
-
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), code_length);
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 8; // just a two-instruction safety net
+  int       slop_delta = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
+    start_pc = __ pc();
+    int load_const_maxLen = 5*BytesPerInstWord;  // load_const generates 5 instructions. Assume that as max size for laod_const_optimized
     int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true);
+    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
     __ lwz(R12_scratch2, offs, R11_scratch1);
     __ addi(R12_scratch2, R12_scratch2, 1);
     __ stw(R12_scratch2, offs, R11_scratch1);
@@ -77,17 +80,13 @@
 
   assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1");
 
-  // Get receiver klass.
   const Register rcvr_klass = R11_scratch1;
-
-  // We might implicit NULL fault here.
   address npe_addr = __ pc(); // npe = null pointer exception
+  // check if we must do an explicit check (implicit checks disabled, offset too large).
   __ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
+  // Get receiver klass.
   __ load_klass(rcvr_klass, R3);
 
- // Set method (in case of interpreted method), and destination address.
-  int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index*vtableEntry::size_in_bytes();
-
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
@@ -102,7 +101,9 @@
   }
 #endif
 
-  int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+  int entry_offset = in_bytes(Klass::vtable_start_offset()) +
+                     vtable_index*vtableEntry::size_in_bytes();
+  int v_off        = entry_offset + vtableEntry::method_offset_in_bytes();
 
   __ ld(R19_method, (RegisterOrConstant)v_off, rcvr_klass);
 
@@ -116,40 +117,48 @@
   }
 #endif
 
-  // If the vtable entry is null, the method is abstract.
   address ame_addr = __ pc(); // ame = abstract method error
+                              // if the vtable entry is null, the method is abstract
+                              // NOTE: for vtable dispatches, the vtable entry will never be null.
+
   __ null_check(R19_method, in_bytes(Method::from_compiled_offset()), /*implicit only*/NULL);
   __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
   __ mtctr(R12_scratch2);
   __ bctr();
 
   masm->flush();
-
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
-  s->set_exception_points(npe_addr, ame_addr);
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
 
   return s;
 }
 
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  // PPC port: use fixed size.
-  const int code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new (code_length) VtableStub(false, itable_index);
-
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 8; // just a two-instruction safety net
+  int       slop_delta = 0;
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), code_length);
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
-  address start_pc;
+  int             load_const_maxLen = 5*BytesPerInstWord;  // load_const generates 5 instructions. Assume that as max size for laod_const_optimized
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
+    start_pc = __ pc();
     int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true);
+    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
     __ lwz(R12_scratch2, offs, R11_scratch1);
     __ addi(R12_scratch2, R12_scratch2, 1);
     __ stw(R12_scratch2, offs, R11_scratch1);
@@ -209,33 +218,22 @@
   // wrong method" stub, and so let the interpreter runtime do all the
   // dirty work.
   __ bind(L_no_such_interface);
+  start_pc = __ pc();
   __ load_const_optimized(R11_scratch1, SharedRuntime::get_handle_wrong_method_stub(), R12_scratch2);
+  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
   __ mtctr(R11_scratch1);
   __ bctr();
 
   masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
 
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  if (DebugVtables || CountCompiledCalls || VerifyOops) {
-    return 1000;
-  }
-  int size = is_vtable_stub ? 20 + 8 : 164 + 20; // Plain + safety
-  if (UseCompressedClassPointers) {
-    size += MacroAssembler::instr_size_for_decode_klass_not_null();
-  }
-  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
-    size += is_vtable_stub ? 8 : 12;
-  }
-  return size;
-}
-
 int VtableStub::pd_code_alignment() {
+  // Power cache line size is 128 bytes, but we want to limit alignment loss.
   const unsigned int icache_line_size = 32;
   return icache_line_size;
 }
--- a/src/hotspot/cpu/s390/vtableStubs_s390.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/s390/vtableStubs_s390.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,9 +37,6 @@
 #include "opto/runtime.hpp"
 #endif
 
-// Machine-dependent part of VtableStubs: create vtableStub of correct
-// size and initialize its code.
-
 #define __ masm->
 
 #ifndef PRODUCT
@@ -48,123 +45,140 @@
 
 // Used by compiler only; may use only caller saved, non-argument registers.
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-
-  const int   code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub *s = new(code_length) VtableStub(true, vtable_index);
-  if (s == NULL) { // Indicates OOM In the code cache.
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
+  // Can be NULL if there is no free space in the code cache.
+  if (s == NULL) {
     return NULL;
   }
 
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+
   ResourceMark    rm;
-  CodeBuffer      cb(s->entry_point(), code_length);
-  MacroAssembler *masm = new MacroAssembler(&cb);
-  int     padding_bytes = 0;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
 
 #if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
-    // Count unused bytes
-    //                  worst case             actual size
-    padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
-
+    //               worst case             actual size
+    slop_delta  = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
     // Use generic emitter for direct memory increment.
     // Abuse Z_method as scratch register for generic emitter.
     // It is loaded further down anyway before it is first used.
+    // No dynamic code size variance here, increment is 1, always.
     __ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
   }
 #endif
 
   assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1");
 
-  // Get receiver klass.
-  // Must do an explicit check if implicit checks are disabled.
-  address npe_addr = __ pc(); // npe == NULL ptr exception
+  const Register rcvr_klass   = Z_R1_scratch;
+  address        npe_addr     = __ pc(); // npe == NULL ptr exception
+  // check if we must do an explicit check (implicit checks disabled, offset too large).
   __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
-  const Register rcvr_klass = Z_R1_scratch;
+  // Get receiver klass.
   __ load_klass(rcvr_klass, Z_ARG1);
 
-  // Set method (in case of interpreted method), and destination address.
-  int entry_offset = in_bytes(Klass::vtable_start_offset()) +
-                     vtable_index * vtableEntry::size_in_bytes();
-
 #ifndef PRODUCT
   if (DebugVtables) {
-    Label L;
+    NearLabel L;
     // Check offset vs vtable length.
     const Register vtable_idx = Z_R0_scratch;
 
-    // Count unused bytes.
-    //                  worst case             actual size
-    padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true);
+    //               worst case             actual size
+    slop_delta  = __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size(), true);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
 
-    assert(Immediate::is_uimm12(in_bytes(Klass::vtable_length_offset())), "disp to large");
+    assert(Displacement::is_shortDisp(in_bytes(Klass::vtable_length_offset())), "disp to large");
     __ z_cl(vtable_idx, in_bytes(Klass::vtable_length_offset()), rcvr_klass);
     __ z_brl(L);
     __ z_lghi(Z_ARG3, vtable_index);  // Debug code, don't optimize.
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false);
     // Count unused bytes (assume worst case here).
-    padding_bytes += 12;
+    slop_bytes += 12;
     __ bind(L);
   }
 #endif
 
-  int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
+  int entry_offset = in_bytes(Klass::vtable_start_offset()) +
+                     vtable_index * vtableEntry::size_in_bytes();
+  int v_off        = entry_offset + vtableEntry::method_offset_in_bytes();
 
+  // Set method (in case of interpreted method), and destination address.
   // Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC.
   if (Displacement::is_validDisp(v_off)) {
     __ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/);
     // Account for the load_const in the else path.
-    padding_bytes += __ load_const_size();
+    slop_delta  = __ load_const_size();
   } else {
     // Worse case, offset does not fit in displacement field.
-    __ load_const(Z_method, v_off); // Z_method temporarily holds the offset value.
+    //               worst case             actual size
+    slop_delta  = __ load_const_size() - __ load_const_optimized_rtn_len(Z_method, v_off, true);
     __ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/);
   }
+  slop_bytes += slop_delta;
 
 #ifndef PRODUCT
   if (DebugVtables) {
-    Label L;
+    NearLabel L;
     __ z_ltgr(Z_method, Z_method);
     __ z_brne(L);
-    __ stop("Vtable entry is ZERO",102);
+    __ stop("Vtable entry is ZERO", 102);
     __ bind(L);
   }
 #endif
 
-  address ame_addr = __ pc(); // ame = abstract method error
-
-  // Must do an explicit check if implicit checks are disabled.
+  // Must do an explicit check if offset too large or implicit checks are disabled.
+  address ame_addr = __ pc();
   __ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset()));
   __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method);
   __ z_br(Z_R1_scratch);
 
   masm->flush();
-
-  s->set_exception_points(npe_addr, ame_addr);
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
 
   return s;
 }
 
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  const int   code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub *s = new(code_length) VtableStub(false, itable_index);
-  if (s == NULL) { // Indicates OOM in the code cache.
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+  // Can be NULL if there is no free space in the code cache.
+  if (s == NULL) {
     return NULL;
   }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
 
   ResourceMark    rm;
-  CodeBuffer      cb(s->entry_point(), code_length);
-  MacroAssembler *masm = new MacroAssembler(&cb);
-  int     padding_bytes = 0;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
 
 #if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
-    // Count unused bytes
-    //                  worst case             actual size
-    padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
-
+    //               worst case             actual size
+    slop_delta  = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
     // Use generic emitter for direct memory increment.
-    // Use Z_tmp_1 as scratch register for generic emitter.
-    __ add2mem_32((Z_R1_scratch), 1, Z_tmp_1);
+    // Abuse Z_method as scratch register for generic emitter.
+    // It is loaded further down anyway before it is first used.
+    // No dynamic code size variance here, increment is 1, always.
+    __ add2mem_32(Address(Z_R1_scratch), 1, Z_method);
   }
 #endif
 
@@ -178,7 +192,7 @@
                  interface  = Z_tmp_2;
 
   // Get receiver klass.
-  // Must do an explicit check if implicit checks are disabled.
+  // Must do an explicit check if offset too large or implicit checks are disabled.
   address npe_addr = __ pc(); // npe == NULL ptr exception
   __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes());
   __ load_klass(rcvr_klass, Z_ARG1);
@@ -195,10 +209,10 @@
 
 #ifndef PRODUCT
   if (DebugVtables) {
-    Label ok1;
+    NearLabel ok1;
     __ z_ltgr(Z_method, Z_method);
     __ z_brne(ok1);
-    __ stop("method is null",103);
+    __ stop("method is null", 103);
     __ bind(ok1);
   }
 #endif
@@ -213,39 +227,24 @@
 
   // Handle IncompatibleClassChangeError in itable stubs.
   __ bind(no_such_interface);
-  // Count unused bytes
-  //                  worst case          actual size
-  // We force resolving of the call site by jumping to
-  // the "handle wrong method" stub, and so let the
+  // more detailed IncompatibleClassChangeError
+  // we force re-resolving of the call site by jumping to
+  // the "handle wrong method" stub, thus letting the
   // interpreter runtime do all the dirty work.
-  padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
+  //               worst case          actual size
+  slop_delta  = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
   __ z_br(Z_R1_scratch);
 
   masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
 
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-// In order to tune these parameters, run the JVM with VM options
-// +PrintMiscellaneous and +WizardMode to see information about
-// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops.
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  int size = DebugVtables ? 216 : 0;
-  if (CountCompiledCalls) {
-    size += 6 * 4;
-  }
-  size += is_vtable_stub ? 36 : 140;
-  if (UseCompressedClassPointers) {
-    size += MacroAssembler::instr_size_for_decode_klass_not_null();
-  }
-  if (!ImplicitNullChecks) {
-    size += 36;
-  }
-  return size;
-}
-
 int VtableStub::pd_code_alignment() {
+  // System z cache line size is 256 bytes, but octoword-alignment is quite ok.
   const unsigned int icache_line_size = 32;
   return icache_line_size;
 }
--- a/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,32 +41,38 @@
 
 #define __ masm->
 
-
 #ifndef PRODUCT
 extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index);
 #endif
 
 
 // Used by compiler only; may use only caller saved, non-argument registers
-// NOTE:  %%%% if any change is made to this stub make sure that the function
-//             pd_code_size_limit is changed to ensure the correct size for VtableStub
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  const int sparc_code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new(sparc_code_length) VtableStub(true, vtable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), sparc_code_length);
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+  const int index_dependent_slop     = ((vtable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?).
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
     __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G5, G3_scratch);
   }
-#endif /* PRODUCT */
+#endif // PRODUCT
 
   assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0");
 
@@ -74,20 +80,33 @@
   address npe_addr = __ pc();
   __ load_klass(O0, G3_scratch);
 
-  // set Method* (in case of interpreted method), and destination address
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
     // check offset vs vtable length
     __ ld(G3_scratch, in_bytes(Klass::vtable_length_offset()), G5);
     __ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L);
+
+    // set generates 8 instructions (worst case), 1 instruction (best case)
+    start_pc = __ pc();
     __ set(vtable_index, O2);
+    slop_delta  = __ worst_case_insts_for_set()*BytesPerInstWord - (__ pc() - start_pc);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
+    // there is no variance in call_VM() emitted code.
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2);
     __ bind(L);
   }
 #endif
 
+  // set Method* (in case of interpreted method), and destination address
+  start_pc = __ pc();
   __ lookup_virtual_method(G3_scratch, vtable_index, G5_method);
+  // lookup_virtual_method generates 3 instructions (worst case), 1 instruction (best case)
+  slop_delta  = 3*BytesPerInstWord - (int)(__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
 
 #ifndef PRODUCT
   if (DebugVtables) {
@@ -109,37 +128,41 @@
   __ delayed()->nop();
 
   masm->flush();
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  vtable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 2*BytesPerInstWord;  // 32-bit offset is this much larger than a 13-bit one
-  assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
-// NOTE:  %%%% if any change is made to this stub make sure that the function
-//             pd_code_size_limit is changed to ensure the correct size for VtableStub
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  const int sparc_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+  const int index_dependent_slop     = ((itable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?).
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), sparc_code_length);
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+//  Use G3_scratch, G4_scratch as work regs for inc_counter.
+//  These are defined before use further down.
+    __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G3_scratch, G4_scratch);
+  }
+#endif // PRODUCT
+
   Register G3_Klass = G3_scratch;
   Register G5_icholder = G5;  // Passed in as an argument
   Register G4_interface = G4_scratch;
@@ -160,15 +183,10 @@
   // and so those registers are not available here.
   __ save(SP,-frame::register_save_words*wordSize,SP);
 
-#ifndef PRODUCT
-  if (CountCompiledCalls) {
-    __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), L0, L1);
-  }
-#endif /* PRODUCT */
+  Label    L_no_such_interface;
+  Register L5_method = L5;
 
-  Label L_no_such_interface;
-
-  Register L5_method = L5;
+  start_pc = __ pc();
 
   // Receiver subtype check against REFC.
   __ ld_ptr(G5_icholder, CompiledICHolder::holder_klass_offset(), G4_interface);
@@ -179,6 +197,9 @@
                              L_no_such_interface,
                              /*return_method=*/ false);
 
+  const ptrdiff_t typecheckSize = __ pc() - start_pc;
+  start_pc = __ pc();
+
   // Get Method* and entrypoint for compiler
   __ ld_ptr(G5_icholder, CompiledICHolder::holder_metadata_offset(), G4_interface);
   __ lookup_interface_method(// inputs: rec. class, interface, itable index
@@ -187,6 +208,19 @@
                              L5_method, L2, L3,
                              L_no_such_interface);
 
+  const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+  // Reduce "estimate" such that "padding" does not drop below 8.
+  // Do not target a left-over number of zero, because a very
+  // large vtable or itable offset (> 4K) will require an extra
+  // sethi/or pair of instructions.
+  // Found typecheck(60) + lookup(72) to exceed previous extimate (32*4).
+  const ptrdiff_t estimate = 36*BytesPerInstWord;
+  const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
+  slop_delta  = (int)(estimate - codesize);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L01;
@@ -222,88 +256,12 @@
   __ delayed()->restore();
 
   masm->flush();
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  itable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 2*BytesPerInstWord;  // 32-bit offset is this much larger than a 13-bit one
-  assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  if (DebugVtables || CountCompiledCalls || VerifyOops) return 1000;
-  else {
-    const int slop = 2*BytesPerInstWord; // sethi;add  (needed for long offsets)
-    if (is_vtable_stub) {
-      // ld;ld;ld,jmp,nop
-      const int basic = 5*BytesPerInstWord +
-                        // shift;add for load_klass (only shift with zero heap based)
-                        (UseCompressedClassPointers ?
-                          MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
-      return basic + slop;
-    } else {
-      const int basic = 54 * BytesPerInstWord +
-                        // shift;add for load_klass (only shift with zero heap based)
-                        (UseCompressedClassPointers ?
-                          MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
-      return (basic + slop);
-    }
-  }
-
-  // In order to tune these parameters, run the JVM with VM options
-  // +PrintMiscellaneous and +WizardMode to see information about
-  // actual itable stubs.  Look for lines like this:
-  //   itable #1 at 0x5551212[116] left over: 8
-  // Reduce the constants so that the "left over" number is 8
-  // Do not aim at a left-over number of zero, because a very
-  // large vtable or itable offset (> 4K) will require an extra
-  // sethi/or pair of instructions.
-  //
-  // The JVM98 app. _202_jess has a megamorphic interface call.
-  // The itable code looks like this:
-  // Decoding VtableStub itbl[1]@16
-  //   ld  [ %o0 + 4 ], %g3
-  //   save  %sp, -64, %sp
-  //   ld  [ %g3 + 0xe8 ], %l2
-  //   sll  %l2, 2, %l2
-  //   add  %l2, 0x134, %l2
-  //   add  %g3, %l2, %l2
-  //   add  %g3, 4, %g3
-  //   ld  [ %l2 ], %l5
-  //   brz,pn   %l5, throw_icce
-  //   cmp  %l5, %g5
-  //   be  %icc, success
-  //   add  %l2, 8, %l2
-  // loop:
-  //   ld  [ %l2 ], %l5
-  //   brz,pn   %l5, throw_icce
-  //   cmp  %l5, %g5
-  //   bne,pn   %icc, loop
-  //   add  %l2, 8, %l2
-  // success:
-  //   ld  [ %l2 + -4 ], %l2
-  //   ld  [ %g3 + %l2 ], %l5
-  //   restore  %l5, 0, %g5
-  //   ld  [ %g5 + 0x44 ], %g3
-  //   jmp  %g3
-  //   nop
-  // throw_icce:
-  //   sethi  %hi(throw_ICCE_entry), %g3
-  //   ! 5 more instructions here, LP64_ONLY
-  //   jmp  %g3 + %lo(throw_ICCE_entry)
-  //   restore
-}
-
-
 int VtableStub::pd_code_alignment() {
   // UltraSPARC cache line size is 8 instructions:
   const unsigned int icache_line_size = 32;
--- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,25 +55,34 @@
 // Available now, but may become callee-save at some point:
 //   rsi, rdi
 // Note that rax and rdx are also used for return values.
-//
+
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  const int i486_code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), i486_code_length);
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
+  const int index_dependent_slop     = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
-
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
     __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
   }
-#endif /* PRODUCT */
+#endif
 
   // get receiver (need to skip return address on top of stack)
   assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
@@ -85,11 +94,21 @@
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
+    start_pc = __ pc();
     // check offset vs vtable length
     __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
+    slop_delta  = 6 - (__ pc() - start_pc);  // cmpl varies in length, depending on data
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
     __ jcc(Assembler::greater, L);
     __ movl(rbx, vtable_index);
+    // VTABLE TODO: find upper bound for call_VM length.
+    start_pc = __ pc();
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
+    slop_delta  = 480 - (__ pc() - start_pc);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
     __ bind(L);
   }
 #endif // PRODUCT
@@ -97,8 +116,13 @@
   const Register method = rbx;
 
   // load Method* and target address
+  start_pc = __ pc();
   __ lookup_virtual_method(rax, vtable_index, method);
+  slop_delta  = 6 - (int)(__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
 
+#ifndef PRODUCT
   if (DebugVtables) {
     Label L;
     __ cmpptr(method, (int32_t)NULL_WORD);
@@ -108,55 +132,53 @@
     __ stop("Vtable entry is NULL");
     __ bind(L);
   }
+#endif // PRODUCT
 
-  // rax,: receiver klass
+  // rax: receiver klass
   // method (rbx): Method*
   // rcx: receiver
   address ame_addr = __ pc();
   __ jmp( Address(method, Method::from_compiled_offset()));
 
   masm->flush();
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  vtable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
-  assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  // Note well: pd_code_size_limit is the absolute minimum we can get away with.  If you
-  //            add code here, bump the code stub size returned by pd_code_size_limit!
-  const int i486_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+  const int index_dependent_slop = (itable_index == 0) ? 4 :     // code size change with transition from 8-bit to 32-bit constant (@index == 32).
+                                   (itable_index < 32) ? 3 : 0;  // index == 0 generates even shorter code.
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), i486_code_length);
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+  }
+#endif /* PRODUCT */
+
   // Entry arguments:
   //  rax: CompiledICHolder
   //  rcx: Receiver
 
-#ifndef PRODUCT
-  if (CountCompiledCalls) {
-    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-  }
-#endif /* PRODUCT */
-
   // Most registers are in use; we'll use rax, rbx, rsi, rdi
   // (If we need to make rsi, rdi callee-save, do a push/pop here.)
   const Register recv_klass_reg     = rsi;
@@ -171,10 +193,12 @@
   Label L_no_such_interface;
 
   // get receiver klass (also an implicit null-check)
+  assert(VtableStub::receiver_location() ==  rcx->as_VMReg(), "receiver expected in  rcx");
   address npe_addr = __ pc();
-  assert(VtableStub::receiver_location() ==  rcx->as_VMReg(), "receiver expected in  rcx");
   __ load_klass(recv_klass_reg, rcx);
 
+  start_pc = __ pc();
+
   // Receiver subtype check against REFC.
   // Destroys recv_klass_reg value.
   __ lookup_interface_method(// inputs: rec. class, interface
@@ -184,6 +208,9 @@
                              L_no_such_interface,
                              /*return_method=*/false);
 
+  const ptrdiff_t  typecheckSize = __ pc() - start_pc;
+  start_pc = __ pc();
+
   // Get selected method from declaring class and itable index
   const Register method = rbx;
   __ load_klass(recv_klass_reg, rcx); // restore recv_klass_reg
@@ -193,19 +220,30 @@
                              method, temp_reg,
                              L_no_such_interface);
 
+  const ptrdiff_t  lookupSize = __ pc() - start_pc;
+
+  // We expect we need index_dependent_slop extra bytes. Reason:
+  // The emitted code in lookup_interface_method changes when itable_index exceeds 31.
+  // For windows, a narrow estimate was found to be 104. Other OSes not tested.
+  const ptrdiff_t estimate = 104;
+  const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
+  slop_delta  = (int)(estimate - codesize);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
   // method (rbx): Method*
   // rcx: receiver
 
 #ifdef ASSERT
   if (DebugVtables) {
-      Label L1;
-      __ cmpptr(method, (int32_t)NULL_WORD);
-      __ jcc(Assembler::equal, L1);
-      __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
-      __ jcc(Assembler::notZero, L1);
-      __ stop("Method* is null");
-      __ bind(L1);
-    }
+    Label L1;
+    __ cmpptr(method, (int32_t)NULL_WORD);
+    __ jcc(Assembler::equal, L1);
+    __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notZero, L1);
+    __ stop("Method* is null");
+    __ bind(L1);
+  }
 #endif // ASSERT
 
   address ame_addr = __ pc();
@@ -219,70 +257,15 @@
   // dirty work.
   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 
-  __ flush();
+  masm->flush();
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  itable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
-  assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-
-
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  if (is_vtable_stub) {
-    // Vtable stub size
-    return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
-  } else {
-    // Itable stub size
-    return (DebugVtables ? 256 : 110) + (CountCompiledCalls ? 6 : 0);
-  }
-  // In order to tune these parameters, run the JVM with VM options
-  // +PrintMiscellaneous and +WizardMode to see information about
-  // actual itable stubs.  Look for lines like this:
-  //   itable #1 at 0x5551212[65] left over: 3
-  // Reduce the constants so that the "left over" number is >=3
-  // for the common cases.
-  // Do not aim at a left-over number of zero, because a
-  // large vtable or itable index (> 16) will require a 32-bit
-  // immediate displacement instead of an 8-bit one.
-  //
-  // The JVM98 app. _202_jess has a megamorphic interface call.
-  // The itable code looks like this:
-  // Decoding VtableStub itbl[1]@1
-  //   mov    0x4(%ecx),%esi
-  //   mov    0xe8(%esi),%edi
-  //   lea    0x130(%esi,%edi,4),%edi
-  //   add    $0x7,%edi
-  //   and    $0xfffffff8,%edi
-  //   lea    0x4(%esi),%esi
-  //   mov    (%edi),%ebx
-  //   cmp    %ebx,%eax
-  //   je     success
-  // loop:
-  //   test   %ebx,%ebx
-  //   je     throw_icce
-  //   add    $0x8,%edi
-  //   mov    (%edi),%ebx
-  //   cmp    %ebx,%eax
-  //   jne    loop
-  // success:
-  //   mov    0x4(%edi),%edi
-  //   mov    (%esi,%edi,1),%ebx
-  //   jmp    *0x44(%ebx)
-  // throw_icce:
-  //   jmp    throw_ICCE_entry
+int VtableStub::pd_code_alignment() {
+  // x86 cache line size is 64 bytes, but we want to limit alignment loss.
+  const unsigned int icache_line_size = wordSize;
+  return icache_line_size;
 }
-
-int VtableStub::pd_code_alignment() {
-  return wordSize;
-}
--- a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,24 +42,32 @@
 #define __ masm->
 
 #ifndef PRODUCT
-extern "C" void bad_compiled_vtable_index(JavaThread* thread,
-                                          oop receiver,
-                                          int index);
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
 #endif
 
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  const int amd64_code_length = VtableStub::pd_code_size_limit(true);
-  VtableStub* s = new(amd64_code_length) VtableStub(true, vtable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), amd64_code_length);
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
+  const int index_dependent_slop     = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
     __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
   }
@@ -77,22 +85,35 @@
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
+    start_pc = __ pc();
     // check offset vs vtable length
-    __ cmpl(Address(rax, Klass::vtable_length_offset()),
-            vtable_index * vtableEntry::size());
+    __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
+    slop_delta  = 12 - (__ pc() - start_pc);  // cmpl varies in length, depending on data
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
     __ jcc(Assembler::greater, L);
     __ movl(rbx, vtable_index);
-    __ call_VM(noreg,
-               CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx);
+    // VTABLE TODO: find upper bound for call_VM length.
+    start_pc = __ pc();
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx);
+    slop_delta  = 480 - (__ pc() - start_pc);
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
     __ bind(L);
   }
 #endif // PRODUCT
 
-  // load Method* and target address
   const Register method = rbx;
 
+  // load Method* and target address
+  start_pc = __ pc();
   __ lookup_virtual_method(rax, vtable_index, method);
+  slop_delta  = 8 - (int)(__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
 
+#ifndef PRODUCT
   if (DebugVtables) {
     Label L;
     __ cmpptr(method, (int32_t)NULL_WORD);
@@ -102,50 +123,48 @@
     __ stop("Vtable entry is NULL");
     __ bind(L);
   }
+#endif // PRODUCT
+
   // rax: receiver klass
-  // rbx: Method*
+  // method (rbx): Method*
   // rcx: receiver
   address ame_addr = __ pc();
   __ jmp( Address(rbx, Method::from_compiled_offset()));
 
-  __ flush();
+  masm->flush();
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  vtable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
-  assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
 VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  // Note well: pd_code_size_limit is the absolute minimum we can get
-  // away with.  If you add code here, bump the code stub size
-  // returned by pd_code_size_limit!
-  const int amd64_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index);
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
   // Can be NULL if there is no free space in the code cache.
   if (s == NULL) {
     return NULL;
   }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+  const int index_dependent_slop = (itable_index == 0) ? 4 :     // code size change with transition from 8-bit to 32-bit constant (@index == 16).
+                                   (itable_index < 16) ? 3 : 0;  // index == 0 generates even shorter code.
 
-  ResourceMark rm;
-  CodeBuffer cb(s->entry_point(), amd64_code_length);
-  MacroAssembler* masm = new MacroAssembler(&cb);
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
+  MacroAssembler *masm = new MacroAssembler(&cb);
 
-#ifndef PRODUCT
+#if (!defined(PRODUCT) && defined(COMPILER2))
   if (CountCompiledCalls) {
     __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
   }
-#endif
+#endif // PRODUCT
 
   // Entry arguments:
   //  rax: CompiledICHolder
@@ -158,17 +177,19 @@
   const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
   const Register temp_reg           = r11;
 
-  Label L_no_such_interface;
-
   const Register icholder_reg = rax;
   __ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
   __ movptr(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
 
+  Label L_no_such_interface;
+
   // get receiver klass (also an implicit null-check)
   assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
   address npe_addr = __ pc();
   __ load_klass(recv_klass_reg, j_rarg0);
 
+  start_pc = __ pc();
+
   // Receiver subtype check against REFC.
   // Destroys recv_klass_reg value.
   __ lookup_interface_method(// inputs: rec. class, interface
@@ -178,6 +199,9 @@
                              L_no_such_interface,
                              /*return_method=*/false);
 
+  const ptrdiff_t  typecheckSize = __ pc() - start_pc;
+  start_pc = __ pc();
+
   // Get selected method from declaring class and itable index
   const Register method = rbx;
   __ load_klass(recv_klass_reg, j_rarg0);   // restore recv_klass_reg
@@ -187,6 +211,17 @@
                              method, temp_reg,
                              L_no_such_interface);
 
+  const ptrdiff_t  lookupSize = __ pc() - start_pc;
+
+  // We expect we need index_dependent_slop extra bytes. Reason:
+  // The emitted code in lookup_interface_method changes when itable_index exceeds 15.
+  // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130).
+  const ptrdiff_t estimate = 136;
+  const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
+  slop_delta  = (int)(estimate - codesize);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
   // If we take a trap while this arg is on the stack we will not
   // be able to walk the stack properly. This is not an issue except
   // when there are mistakes in this assembly code that could generate
@@ -207,8 +242,6 @@
   }
 #endif // ASSERT
 
-  // rbx: Method*
-  // j_rarg0: receiver
   address ame_addr = __ pc();
   __ jmp(Address(method, Method::from_compiled_offset()));
 
@@ -220,68 +253,15 @@
   // dirty work.
   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 
-  __ flush();
+  masm->flush();
+  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
 
-  if (PrintMiscellaneous && (WizardMode || Verbose)) {
-    tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d",
-                  itable_index, p2i(s->entry_point()),
-                  (int)(s->code_end() - s->entry_point()),
-                  (int)(s->code_end() - __ pc()));
-  }
-  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
-  assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset");
-
-  s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
-int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  if (is_vtable_stub) {
-    // Vtable stub size
-    return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) +
-           (UseCompressedClassPointers ?  MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
-  } else {
-    // Itable stub size
-    return (DebugVtables ? 512 : 140) + (CountCompiledCalls ? 13 : 0) +
-           (UseCompressedClassPointers ? 2 * MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
-  }
-  // In order to tune these parameters, run the JVM with VM options
-  // +PrintMiscellaneous and +WizardMode to see information about
-  // actual itable stubs.  Look for lines like this:
-  //   itable #1 at 0x5551212[71] left over: 3
-  // Reduce the constants so that the "left over" number is >=3
-  // for the common cases.
-  // Do not aim at a left-over number of zero, because a
-  // large vtable or itable index (>= 32) will require a 32-bit
-  // immediate displacement instead of an 8-bit one.
-  //
-  // The JVM98 app. _202_jess has a megamorphic interface call.
-  // The itable code looks like this:
-  // Decoding VtableStub itbl[1]@12
-  //   mov    0x8(%rsi),%r10
-  //   mov    0x198(%r10),%r11d
-  //   lea    0x218(%r10,%r11,8),%r11
-  //   lea    0x8(%r10),%r10
-  //   mov    (%r11),%rbx
-  //   cmp    %rbx,%rax
-  //   je     success
-  // loop:
-  //   test   %rbx,%rbx
-  //   je     throw_icce
-  //   add    $0x10,%r11
-  //   mov    (%r11),%rbx
-  //   cmp    %rbx,%rax
-  //   jne    loop
-  // success:
-  //   mov    0x8(%r11),%r11d
-  //   mov    (%r10,%r11,1),%rbx
-  //   jmpq   *0x60(%rbx)
-  // throw_icce:
-  //   jmpq   throw_ICCE_entry
+int VtableStub::pd_code_alignment() {
+  // x86 cache line size is 64 bytes, but we want to limit alignment loss.
+  const unsigned int icache_line_size = wordSize;
+  return icache_line_size;
 }
-
-int VtableStub::pd_code_alignment() {
-  return wordSize;
-}
--- a/src/hotspot/share/code/vtableStubs.cpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/share/code/vtableStubs.cpp	Mon Sep 03 09:43:08 2018 +0200
@@ -92,6 +92,32 @@
 
 VtableStub* VtableStubs::_table[VtableStubs::N];
 int VtableStubs::_number_of_vtable_stubs = 0;
+int VtableStubs::_vtab_stub_size = 0;
+int VtableStubs::_itab_stub_size = 0;
+
+#if defined(PRODUCT)
+  // These values are good for the PRODUCT case (no tracing).
+  static const int first_vtableStub_size =  64;
+  static const int first_itableStub_size = 256;
+#else
+  // These values are good for the non-PRODUCT case (when tracing can be switched on).
+  // To find out, run test workload with
+  //   -Xlog:vtablestubs=Trace -XX:+CountCompiledCalls -XX:+DebugVtables
+  // and use the reported "estimate" value.
+  // Here is a list of observed worst-case values:
+  //               vtable  itable
+  // aarch64:         460     324
+  // arm:               ?       ?
+  // ppc (linux, BE): 404     288
+  // ppc (linux, LE): 356     276
+  // ppc (AIX):       416     296
+  // s390x:           408     256
+  // Solaris-sparc:   792     348
+  // x86 (Linux):     670     309
+  // x86 (MacOS):     682     321
+  static const int first_vtableStub_size = 1024;
+  static const int first_itableStub_size =  512;
+#endif
 
 
 void VtableStubs::initialize() {
@@ -107,6 +133,77 @@
 }
 
 
+int VtableStubs::code_size_limit(bool is_vtable_stub) {
+  if (is_vtable_stub) {
+    return _vtab_stub_size > 0 ? _vtab_stub_size : first_vtableStub_size;
+  } else { // itable stub
+    return _itab_stub_size > 0 ? _itab_stub_size : first_itableStub_size;
+  }
+}   // code_size_limit
+
+
+void VtableStubs::check_and_set_size_limit(bool is_vtable_stub,
+                                           int  code_size,
+                                           int  padding) {
+  const char* name = is_vtable_stub ? "vtable" : "itable";
+
+  guarantee(code_size <= code_size_limit(is_vtable_stub),
+            "buffer overflow in %s stub, code_size is %d, limit is %d", name, code_size, code_size_limit(is_vtable_stub));
+
+  if (is_vtable_stub) {
+    if (log_is_enabled(Trace, vtablestubs)) {
+      if ( (_vtab_stub_size > 0) && ((code_size + padding) > _vtab_stub_size) ) {
+        log_trace(vtablestubs)("%s size estimate needed adjustment from %d to %d bytes",
+                               name, _vtab_stub_size, code_size + padding);
+      }
+    }
+    if ( (code_size + padding) > _vtab_stub_size ) {
+      _vtab_stub_size = code_size + padding;
+    }
+  } else {  // itable stub
+    if (log_is_enabled(Trace, vtablestubs)) {
+      if ( (_itab_stub_size > 0) && ((code_size + padding) > _itab_stub_size) ) {
+        log_trace(vtablestubs)("%s size estimate needed adjustment from %d to %d bytes",
+                               name, _itab_stub_size, code_size + padding);
+      }
+    }
+    if ( (code_size + padding) > _itab_stub_size ) {
+      _itab_stub_size = code_size + padding;
+    }
+  }
+  return;
+}   // check_and_set_size_limit
+
+
+void VtableStubs::bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s,
+                              address npe_addr, address ame_addr,   bool is_vtable_stub,
+                              int     index,    int     slop_bytes, int  index_dependent_slop) {
+  const char* name        = is_vtable_stub ? "vtable" : "itable";
+  const int   stub_length = code_size_limit(is_vtable_stub);
+
+  if (log_is_enabled(Trace, vtablestubs)) {
+    log_trace(vtablestubs)("%s #%d at " PTR_FORMAT ": size: %d, estimate: %d, slop area: %d",
+                           name, index, p2i(s->code_begin()),
+                           (int)(masm->pc() - s->code_begin()),
+                           stub_length,
+                           (int)(s->code_end() - masm->pc()));
+  }
+  guarantee(masm->pc() <= s->code_end(), "%s #%d: overflowed buffer, estimated len: %d, actual len: %d, overrun: %d",
+                                         name, index, stub_length,
+                                         (int)(masm->pc() - s->code_begin()),
+                                         (int)(masm->pc() - s->code_end()));
+  assert((masm->pc() + index_dependent_slop) <= s->code_end(), "%s #%d: spare space for 32-bit offset: required = %d, available = %d",
+                                         name, index, index_dependent_slop,
+                                         (int)(s->code_end() - masm->pc()));
+
+  // After the first vtable/itable stub is generated, we have a much
+  // better estimate for the stub size. Remember/update this
+  // estimate after some sanity checks.
+  check_and_set_size_limit(is_vtable_stub, masm->offset(), slop_bytes);
+  s->set_exception_points(npe_addr, ame_addr);
+}
+
+
 address VtableStubs::find_stub(bool is_vtable_stub, int vtable_index) {
   assert(vtable_index >= 0, "must be positive");
 
@@ -173,10 +270,7 @@
   uint hash = VtableStubs::hash(stub->is_vtable_stub(), stub->index());
   VtableStub* s;
   for (s = _table[hash]; s != NULL && s != stub; s = s->next()) {}
-  if (s == stub) {
-    return s;
-  }
-  return NULL;
+  return (s == stub) ? s : NULL;
 }
 
 bool VtableStubs::contains(address pc) {
--- a/src/hotspot/share/code/vtableStubs.hpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/share/code/vtableStubs.hpp	Mon Sep 03 09:43:08 2018 +0200
@@ -25,12 +25,94 @@
 #ifndef SHARE_VM_CODE_VTABLESTUBS_HPP
 #define SHARE_VM_CODE_VTABLESTUBS_HPP
 
+#include "asm/macroAssembler.hpp"
 #include "code/vmreg.hpp"
 #include "memory/allocation.hpp"
 
 // A VtableStub holds an individual code stub for a pair (vtable index, #args) for either itables or vtables
 // There's a one-to-one relationship between a VtableStub and such a pair.
 
+// A word on VtableStub sizing:
+//   Such a vtable/itable stub consists of the instance data
+//   and an immediately following CodeBuffer.
+//   Unfortunately, the required space for the code buffer varies, depending on
+//   the setting of compile time macros (PRODUCT, ASSERT, ...) and of command line
+//   parameters. Actual data may have an influence on the size as well.
+//
+//   A simple approximation for the VtableStub size would be to just take a value
+//   "large enough" for all circumstances - a worst case estimate.
+//   As there can exist many stubs - and they never go away - we certainly don't
+//   want to waste more code cache space than absolutely necessary.
+//
+//   We need a different approach which, as far as possible, should be independent
+//   from or adaptive to code size variations. These variations may be caused by
+//   changed compile time or run time switches as well as by changed emitter code.
+//
+//   Here is the idea:
+//   For the first stub we generate, we allocate a "large enough" code buffer.
+//   Once all instructions are emitted, we know the actual size of the stub.
+//   Remembering that size allows us to allocate a tightly matching code buffer
+//   for all subsequent stubs. That covers all "static variance", i.e. all variance
+//   that is due to compile time macros, command line parameters, machine capabilities,
+//   and other influences which are immutable for the life span of the vm.
+//
+//   Life isn't always that easy. Code size may depend on actual data, "load constant"
+//   being an example for that. All code segments with such "dynamic variance" require
+//   additional care. We need to know or estimate the worst case code size for each
+//   such segment. With that knowledge, we can maintain a "slop counter" in the
+//   platform-specific stub emitters. It accumulates the difference between worst-case
+//   and actual code size. When the stub is fully generated, the actual stub size is
+//   adjusted (increased) by the slop counter value.
+//
+//   As a result, we allocate all but the first code buffers with the same, tightly matching size.
+//
+
+// VtableStubs creates the code stubs for compiled calls through vtables.
+// There is one stub per (vtable index, args_size) pair, and the stubs are
+// never deallocated. They don't need to be GCed because they contain no oops.
+class VtableStub;
+
+class VtableStubs : AllStatic {
+ public:                                         // N must be public (some compilers need this for _table)
+  enum {
+    N    = 256,                                  // size of stub table; must be power of two
+    mask = N - 1
+  };
+
+ private:
+  friend class VtableStub;
+  static VtableStub* _table[N];                  // table of existing stubs
+  static int         _number_of_vtable_stubs;    // number of stubs created so far (for statistics)
+  static int         _vtab_stub_size;            // current size estimate for vtable stub (quasi-constant)
+  static int         _itab_stub_size;            // current size estimate for itable stub (quasi-constant)
+
+  static VtableStub* create_vtable_stub(int vtable_index);
+  static VtableStub* create_itable_stub(int vtable_index);
+  static VtableStub* lookup            (bool is_vtable_stub, int vtable_index);
+  static void        enter             (bool is_vtable_stub, int vtable_index, VtableStub* s);
+  static inline uint hash              (bool is_vtable_stub, int vtable_index);
+  static address     find_stub         (bool is_vtable_stub, int vtable_index);
+  static void        bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s,
+                                 address npe_addr, address ame_addr,   bool is_vtable_stub,
+                                 int     index,    int     slop_bytes, int  index_dependent_slop);
+  static int         code_size_limit(bool is_vtable_stub);
+  static void        check_and_set_size_limit(bool is_vtable_stub,
+                                              int   code_size,
+                                              int   padding);
+
+ public:
+  static address     find_vtable_stub(int vtable_index) { return find_stub(true,  vtable_index); }
+  static address     find_itable_stub(int itable_index) { return find_stub(false, itable_index); }
+
+  static VtableStub* entry_point(address pc);                        // vtable stub entry point for a pc
+  static bool        contains(address pc);                           // is pc within any stub?
+  static VtableStub* stub_containing(address pc);                    // stub containing pc or NULL
+  static int         number_of_vtable_stubs() { return _number_of_vtable_stubs; }
+  static void        initialize();
+  static void        vtable_stub_do(void f(VtableStub*));            // iterates over all vtable stubs
+};
+
+
 class VtableStub {
  private:
   friend class VtableStubs;
@@ -58,7 +140,7 @@
 
  public:
   address code_begin() const                     { return (address)(this + 1); }
-  address code_end() const                       { return code_begin() + pd_code_size_limit(_is_vtable_stub); }
+  address code_end() const                       { return code_begin() + VtableStubs::code_size_limit(_is_vtable_stub); }
   address entry_point() const                    { return code_begin(); }
   static int entry_offset()                      { return sizeof(class VtableStub); }
 
@@ -78,7 +160,6 @@
   }
 
   // platform-dependent routines
-  static int  pd_code_size_limit(bool is_vtable_stub);
   static int  pd_code_alignment();
   // CNC: Removed because vtable stubs are now made with an ideal graph
   // static bool pd_disregard_arg_size();
@@ -100,38 +181,4 @@
 
 };
 
-
-// VtableStubs creates the code stubs for compiled calls through vtables.
-// There is one stub per (vtable index, args_size) pair, and the stubs are
-// never deallocated. They don't need to be GCed because they contain no oops.
-
-class VtableStubs : AllStatic {
- public:                                         // N must be public (some compilers need this for _table)
-  enum {
-    N    = 256,                                  // size of stub table; must be power of two
-    mask = N - 1
-  };
-
- private:
-  static VtableStub* _table[N];                  // table of existing stubs
-  static int         _number_of_vtable_stubs;    // number of stubs created so far (for statistics)
-
-  static VtableStub* create_vtable_stub(int vtable_index);
-  static VtableStub* create_itable_stub(int vtable_index);
-  static VtableStub* lookup            (bool is_vtable_stub, int vtable_index);
-  static void        enter             (bool is_vtable_stub, int vtable_index, VtableStub* s);
-  static inline uint hash              (bool is_vtable_stub, int vtable_index);
-  static address     find_stub         (bool is_vtable_stub, int vtable_index);
-
- public:
-  static address     find_vtable_stub(int vtable_index) { return find_stub(true,  vtable_index); }
-  static address     find_itable_stub(int itable_index) { return find_stub(false, itable_index); }
-  static VtableStub* entry_point(address pc);                        // vtable stub entry point for a pc
-  static bool        contains(address pc);                           // is pc within any stub?
-  static VtableStub* stub_containing(address pc);                    // stub containing pc or NULL
-  static int         number_of_vtable_stubs() { return _number_of_vtable_stubs; }
-  static void        initialize();
-  static void        vtable_stub_do(void f(VtableStub*));            // iterates over all vtable stubs
-};
-
 #endif // SHARE_VM_CODE_VTABLESTUBS_HPP
--- a/src/hotspot/share/logging/logTag.hpp	Sat Sep 01 12:02:07 2018 -0700
+++ b/src/hotspot/share/logging/logTag.hpp	Mon Sep 03 09:43:08 2018 +0200
@@ -170,7 +170,8 @@
   LOG_TAG(vmoperation) \
   LOG_TAG(vmthread) \
   LOG_TAG(vtables) \
-  LOG_TAG(workgang)
+  LOG_TAG(vtablestubs) \
+  LOG_TAG(workgang) \
   LOG_TAG_LIST_EXT
 
 #define PREFIX_LOG_TAG(T) (LogTag::_##T)