8204240: Extend MDO to allow more reasons to be recorded per bci
authorroland
Mon, 11 Jun 2018 15:28:24 +0200
changeset 50577 bf7e2684cd0a
parent 50576 374bd919d8fe
child 50578 e2a7f431f65c
8204240: Extend MDO to allow more reasons to be recorded per bci Reviewed-by: kvn, neliasso
src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
src/hotspot/cpu/x86/assembler_x86.cpp
src/hotspot/cpu/x86/assembler_x86.hpp
src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
src/hotspot/cpu/x86/interp_masm_x86.cpp
src/hotspot/share/jvmci/vmStructs_jvmci.cpp
src/hotspot/share/oops/methodData.hpp
src/hotspot/share/runtime/deoptimization.cpp
src/hotspot/share/runtime/vmStructs.cpp
src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/DataLayout.java
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -1344,12 +1344,11 @@
       __ mov_metadata(mdo, md->constant_encoding());
       Address data_addr
         = __ form_address(rscratch2, mdo,
-                          md->byte_offset_of_slot(data, DataLayout::DataLayout::header_offset()),
-                          LogBytesPerWord);
-      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
-      __ ldr(rscratch1, data_addr);
-      __ orr(rscratch1, rscratch1, header_bits);
-      __ str(rscratch1, data_addr);
+                          md->byte_offset_of_slot(data, DataLayout::flags_offset()),
+                          0);
+      __ ldrb(rscratch1, data_addr);
+      __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant());
+      __ strb(rscratch1, data_addr);
       __ b(*obj_is_null);
       __ bind(not_null);
     } else {
@@ -1422,7 +1421,7 @@
     Address counter_addr
       = __ form_address(rscratch2, mdo,
                         md->byte_offset_of_slot(data, CounterData::count_offset()),
-                        LogBytesPerWord);
+                        0);
     __ ldr(rscratch1, counter_addr);
     __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
     __ str(rscratch1, counter_addr);
@@ -1471,12 +1470,11 @@
       __ mov_metadata(mdo, md->constant_encoding());
       Address data_addr
         = __ form_address(rscratch2, mdo,
-                          md->byte_offset_of_slot(data, DataLayout::header_offset()),
-                          LogBytesPerInt);
-      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
-      __ ldrw(rscratch1, data_addr);
-      __ orrw(rscratch1, rscratch1, header_bits);
-      __ strw(rscratch1, data_addr);
+                          md->byte_offset_of_slot(data, DataLayout::flags_offset()),
+                          0);
+      __ ldrb(rscratch1, data_addr);
+      __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant());
+      __ strb(rscratch1, data_addr);
       __ b(done);
       __ bind(not_null);
     } else {
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -967,12 +967,11 @@
 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
                                                 int flag_byte_constant) {
   assert(ProfileInterpreter, "must be profiling interpreter");
-  int header_offset = in_bytes(DataLayout::header_offset());
-  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  int flags_offset = in_bytes(DataLayout::flags_offset());
   // Set the flag
-  ldr(rscratch1, Address(mdp_in, header_offset));
-  orr(rscratch1, rscratch1, header_bits);
-  str(rscratch1, Address(mdp_in, header_offset));
+  ldrb(rscratch1, Address(mdp_in, flags_offset));
+  orr(rscratch1, rscratch1, flag_byte_constant);
+  strb(rscratch1, Address(mdp_in, flags_offset));
 }
 
 
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -3338,6 +3338,14 @@
   emit_operand(src, dst);
 }
 
+void Assembler::orb(Address dst, int imm8) {
+  InstructionMark im(this);
+  prefix(dst);
+  emit_int8((unsigned char)0x80);
+  emit_operand(rcx, dst, 1);
+  emit_int8(imm8);
+}
+
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Mon Jun 11 15:28:24 2018 +0200
@@ -1536,6 +1536,8 @@
   void orl(Register dst, Register src);
   void orl(Address dst, Register src);
 
+  void orb(Address dst, int imm8);
+
   void orq(Address dst, int32_t imm32);
   void orq(Register dst, int32_t imm32);
   void orq(Register dst, Address src);
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -1682,9 +1682,9 @@
     // Object is null; update MDO and exit
     Register mdo  = klass_RInfo;
     __ mov_metadata(mdo, md->constant_encoding());
-    Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
-    int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
-    __ orl(data_addr, header_bits);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
+    int header_bits = BitData::null_seen_byte_constant();
+    __ orb(data_addr, header_bits);
     __ jmp(*obj_is_null);
     __ bind(not_null);
   } else {
@@ -1828,9 +1828,9 @@
       // Object is null; update MDO and exit
       Register mdo  = klass_RInfo;
       __ mov_metadata(mdo, md->constant_encoding());
-      Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
-      int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
-      __ orl(data_addr, header_bits);
+      Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
+      int header_bits = BitData::null_seen_byte_constant();
+      __ orb(data_addr, header_bits);
       __ jmp(done);
       __ bind(not_null);
     } else {
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -1432,10 +1432,10 @@
 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
                                                 int flag_byte_constant) {
   assert(ProfileInterpreter, "must be profiling interpreter");
-  int header_offset = in_bytes(DataLayout::header_offset());
-  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  int header_offset = in_bytes(DataLayout::flags_offset());
+  int header_bits = flag_byte_constant;
   // Set the flag
-  orl(Address(mdp_in, header_offset), header_bits);
+  orb(Address(mdp_in, header_offset), header_bits);
 }
 
 
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -136,6 +136,7 @@
   nonstatic_field(DataLayout,                  _header._struct._tag,                   u1)                                           \
   nonstatic_field(DataLayout,                  _header._struct._flags,                 u1)                                           \
   nonstatic_field(DataLayout,                  _header._struct._bci,                   u2)                                           \
+  nonstatic_field(DataLayout,                  _header._struct._traps,                 u4)                                           \
   nonstatic_field(DataLayout,                  _cells[0],                              intptr_t)                                     \
                                                                                                                                      \
   nonstatic_field(Deoptimization::UnrollBlock, _size_of_deoptimized_frame,             int)                                          \
--- a/src/hotspot/share/oops/methodData.hpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/share/oops/methodData.hpp	Mon Jun 11 15:28:24 2018 +0200
@@ -83,16 +83,17 @@
 private:
   // Every data layout begins with a header.  This header
   // contains a tag, which is used to indicate the size/layout
-  // of the data, 4 bits of flags, which can be used in any way,
-  // 4 bits of trap history (none/one reason/many reasons),
+  // of the data, 8 bits of flags, which can be used in any way,
+  // 32 bits of trap history (none/one reason/many reasons),
   // and a bci, which is used to tie this piece of data to a
   // specific bci in the bytecodes.
   union {
-    intptr_t _bits;
+    u8 _bits;
     struct {
       u1 _tag;
       u1 _flags;
       u2 _bci;
+      u4 _traps;
     } _struct;
   } _header;
 
@@ -131,28 +132,23 @@
   };
 
   enum {
-    // The _struct._flags word is formatted as [trap_state:4 | flags:4].
-    // The trap state breaks down further as [recompile:1 | reason:3].
+    // The trap state breaks down as [recompile:1 | reason:31].
     // This further breakdown is defined in deoptimization.cpp.
     // See Deoptimization::trap_state_reason for an assert that
     // trap_bits is big enough to hold reasons < Reason_RECORDED_LIMIT.
     //
     // The trap_state is collected only if ProfileTraps is true.
-    trap_bits = 1+3,  // 3: enough to distinguish [0..Reason_RECORDED_LIMIT].
-    trap_shift = BitsPerByte - trap_bits,
+    trap_bits = 1+31,  // 31: enough to distinguish [0..Reason_RECORDED_LIMIT].
     trap_mask = right_n_bits(trap_bits),
-    trap_mask_in_place = (trap_mask << trap_shift),
-    flag_limit = trap_shift,
-    flag_mask = right_n_bits(flag_limit),
     first_flag = 0
   };
 
   // Size computation
   static int header_size_in_bytes() {
-    return cell_size;
+    return header_size_in_cells() * cell_size;
   }
   static int header_size_in_cells() {
-    return 1;
+    return LP64_ONLY(1) NOT_LP64(2);
   }
 
   static int compute_size_in_bytes(int cell_count) {
@@ -167,7 +163,7 @@
     return _header._struct._tag;
   }
 
-  // Return a few bits of trap state.  Range is [0..trap_mask].
+  // Return 32 bits of trap state.
   // The state tells if traps with zero, one, or many reasons have occurred.
   // It also tells whether zero or many recompilations have occurred.
   // The associated trap histogram in the MDO itself tells whether
@@ -175,14 +171,14 @@
   // occurred, and the MDO shows N occurrences of X, we make the
   // simplifying assumption that all N occurrences can be blamed
   // on that BCI.
-  int trap_state() const {
-    return ((_header._struct._flags >> trap_shift) & trap_mask);
+  uint trap_state() const {
+    return _header._struct._traps;
   }
 
-  void set_trap_state(int new_state) {
+  void set_trap_state(uint new_state) {
     assert(ProfileTraps, "used only under +ProfileTraps");
-    uint old_flags = (_header._struct._flags & flag_mask);
-    _header._struct._flags = (new_state << trap_shift) | old_flags;
+    uint old_flags = _header._struct._traps;
+    _header._struct._traps = new_state | old_flags;
   }
 
   u1 flags() const {
@@ -193,10 +189,10 @@
     return _header._struct._bci;
   }
 
-  void set_header(intptr_t value) {
+  void set_header(u8 value) {
     _header._bits = value;
   }
-  intptr_t header() {
+  u8 header() {
     return _header._bits;
   }
   void set_cell_at(int index, intptr_t value) {
@@ -207,12 +203,10 @@
     return _cells[index];
   }
 
-  void set_flag_at(int flag_number) {
-    assert(flag_number < flag_limit, "oob");
+  void set_flag_at(u1 flag_number) {
     _header._struct._flags |= (0x1 << flag_number);
   }
-  bool flag_at(int flag_number) const {
-    assert(flag_number < flag_limit, "oob");
+  bool flag_at(u1 flag_number) const {
     return (_header._struct._flags & (0x1 << flag_number)) != 0;
   }
 
@@ -233,14 +227,13 @@
     return byte_offset_of(DataLayout, _cells) + in_ByteSize(index * cell_size);
   }
   // Return a value which, when or-ed as a byte into _flags, sets the flag.
-  static int flag_number_to_byte_constant(int flag_number) {
-    assert(0 <= flag_number && flag_number < flag_limit, "oob");
+  static u1 flag_number_to_constant(u1 flag_number) {
     DataLayout temp; temp.set_header(0);
     temp.set_flag_at(flag_number);
     return temp._header._struct._flags;
   }
   // Return a value which, when or-ed as a word into _header, sets the flag.
-  static intptr_t flag_mask_to_header_mask(int byte_constant) {
+  static u8 flag_mask_to_header_mask(uint byte_constant) {
     DataLayout temp; temp.set_header(0);
     temp._header._struct._flags = byte_constant;
     return temp._header._bits;
@@ -359,8 +352,8 @@
   static ByteSize cell_offset(int index) {
     return DataLayout::cell_offset(index);
   }
-  static int flag_number_to_byte_constant(int flag_number) {
-    return DataLayout::flag_number_to_byte_constant(flag_number);
+  static int flag_number_to_constant(int flag_number) {
+    return DataLayout::flag_number_to_constant(flag_number);
   }
 
   ProfileData(DataLayout* data) {
@@ -534,7 +527,7 @@
 
   // Code generation support
   static int null_seen_byte_constant() {
-    return flag_number_to_byte_constant(null_seen_flag);
+    return flag_number_to_constant(null_seen_flag);
   }
 
   static ByteSize bit_data_size() {
@@ -1862,6 +1855,17 @@
 protected:
   enum {
     speculative_trap_method,
+#ifndef _LP64
+    // The size of the area for traps is a multiple of the header
+    // size, 2 cells on 32 bits. Packed at the end of this area are
+    // argument info entries (with tag
+    // DataLayout::arg_info_data_tag). The logic in
+    // MethodData::bci_to_extra_data() that guarantees traps don't
+    // overflow over argument info entries assumes the size of a
+    // SpeculativeTrapData is twice the header size. On 32 bits, a
+    // SpeculativeTrapData must be 4 cells.
+    padding,
+#endif
     speculative_trap_cell_count
   };
 public:
--- a/src/hotspot/share/runtime/deoptimization.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/share/runtime/deoptimization.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -2071,7 +2071,7 @@
 
 // Local derived constants.
 // Further breakdown of DataLayout::trap_state, as promised by DataLayout.
-const int DS_REASON_MASK   = DataLayout::trap_mask >> 1;
+const int DS_REASON_MASK   = ((uint)DataLayout::trap_mask) >> 1;
 const int DS_RECOMPILE_BIT = DataLayout::trap_mask - DS_REASON_MASK;
 
 //---------------------------trap_state_reason---------------------------------
--- a/src/hotspot/share/runtime/vmStructs.cpp	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/hotspot/share/runtime/vmStructs.cpp	Mon Jun 11 15:28:24 2018 +0200
@@ -291,6 +291,7 @@
   nonstatic_field(DataLayout,                  _header._struct._tag,                          u1)                                    \
   nonstatic_field(DataLayout,                  _header._struct._flags,                        u1)                                    \
   nonstatic_field(DataLayout,                  _header._struct._bci,                          u2)                                    \
+  nonstatic_field(DataLayout,                  _header._struct._traps,                        u4)                                    \
   nonstatic_field(DataLayout,                  _cells[0],                                     intptr_t)                              \
   nonstatic_field(MethodCounters,              _nmethod_age,                                  int)                                   \
   nonstatic_field(MethodCounters,              _interpreter_invocation_limit,                 int)                                   \
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/DataLayout.java	Fri Jun 15 11:58:34 2018 +0530
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/DataLayout.java	Mon Jun 11 15:28:24 2018 +0200
@@ -47,19 +47,14 @@
   public static final int parametersTypeDataTag = 12;
   public static final int speculativeTrapDataTag = 13;
 
-  // The _struct._flags word is formatted as [trapState:4 | flags:4].
-  // The trap state breaks down further as [recompile:1 | reason:3].
+  // The trap state breaks down as [recompile:1 | reason:31].
   // This further breakdown is defined in deoptimization.cpp.
   // See Deoptimization.trapStateReason for an assert that
   // trapBits is big enough to hold reasons < reasonRecordedLimit.
   //
   // The trapState is collected only if ProfileTraps is true.
-  public static final int trapBits = 1+3;  // 3: enough to distinguish [0..reasonRecordedLimit].
-  public static final int trapShift = 8 - trapBits;
+  public static final int trapBits = 1+31;  // 31: enough to distinguish [0..reasonRecordedLimit].
   public static final int trapMask = Bits.rightNBits(trapBits);
-  public static final int trapMaskInPlace = (trapMask << trapShift);
-  public static final int flagLimit = trapShift;
-  public static final int flagMask = Bits.rightNBits(flagLimit);
   public static final int firstFlag = 0;
 
   private Address data;
@@ -97,16 +92,17 @@
 
   // Every data layout begins with a header.  This header
   // contains a tag, which is used to indicate the size/layout
-  // of the data, 4 bits of flags, which can be used in any way,
-  // 4 bits of trap history (none/one reason/many reasons),
+  // of the data, 8 bits of flags, which can be used in any way,
+  // 32 bits of trap history (none/one reason/many reasons),
   // and a bci, which is used to tie this piece of data to a
   // specific bci in the bytecodes.
   // union {
-  //   intptrT _bits;
+  //   u8 _bits;
   //   struct {
   //     u1 _tag;
   //     u1 _flags;
   //     u2 _bci;
+  //     u4 _traps;
   //   } _struct;
   // } _header;
 
@@ -119,10 +115,10 @@
 
   // Size computation
   static int headerSizeInBytes() {
-    return MethodData.cellSize;
+    return MethodData.cellSize * headerSizeInCells();
   }
   static int headerSizeInCells() {
-    return 1;
+      return VM.getVM().isLP64() ? 1 : 2;
   }
 
   static public int computeSizeInBytes(int cellCount) {
@@ -146,7 +142,7 @@
   // simplifying assumption that all N occurrences can be blamed
   // on that BCI.
   int trapState() {
-    return (flags() >> trapShift) & trapMask;
+    return data.getJIntAt(offset+4);
   }
 
   int flags() {