8203394: Implementation of JEP 331: Low-Overhead Heap Profiling
authorjcbeyler
Fri, 15 Jun 2018 00:49:54 -0700
changeset 50578 e2a7f431f65c
parent 50577 bf7e2684cd0a
child 50579 1596f418ffe4
8203394: Implementation of JEP 331: Low-Overhead Heap Profiling Summary: Implement Low-Overhead Heap Profiling Reviewed-by: eosterlund, gthornbr, rehn, sspitsyn, tschatzl
make/nb_native/nbproject/configurations.xml
src/hotspot/share/gc/shared/collectedHeap.cpp
src/hotspot/share/gc/shared/collectedHeap.hpp
src/hotspot/share/gc/shared/collectedHeap.inline.hpp
src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
src/hotspot/share/prims/jvmti.xml
src/hotspot/share/prims/jvmtiEnv.cpp
src/hotspot/share/prims/jvmtiEventController.cpp
src/hotspot/share/prims/jvmtiExport.cpp
src/hotspot/share/prims/jvmtiExport.hpp
src/hotspot/share/prims/jvmtiManageCapabilities.cpp
src/hotspot/share/prims/jvmtiThreadState.cpp
src/hotspot/share/prims/jvmtiThreadState.hpp
src/hotspot/share/runtime/mutexLocker.cpp
src/hotspot/share/runtime/mutexLocker.hpp
src/hotspot/share/runtime/thread.hpp
--- a/make/nb_native/nbproject/configurations.xml	Mon Jun 11 15:28:24 2018 +0200
+++ b/make/nb_native/nbproject/configurations.xml	Fri Jun 15 00:49:54 2018 -0700
@@ -6153,6 +6153,9 @@
                 <df name="IsModifiableModule">
                   <in>libIsModifiableModuleTest.c</in>
                 </df>
+                <df name="HeapMonitorModule">
+                  <in>libHeapMonitorTest.c</in>
+                </df>
                 <df name="ModuleAwareAgents">
                   <df name="ClassFileLoadHook">
                     <in>libMAAClassFileLoadHook.c</in>
@@ -40154,6 +40157,11 @@
             tool="0"
             flavor2="0">
       </item>
+      <item path="../../test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/libHeapMonitorTest.c"
+            ex="false"
+            tool="0"
+            flavor2="0">
+      </item>
       <item path="../../test/hotspot/jtreg/serviceability/jvmti/ModuleAwareAgents/ClassFileLoadHook/libMAAClassFileLoadHook.c"
             ex="false"
             tool="0"
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -378,6 +378,27 @@
 }
 
 HeapWord* CollectedHeap::allocate_from_tlab_slow(Klass* klass, size_t size, TRAPS) {
+  HeapWord* obj = NULL;
+
+  // In assertion mode, check that there was a sampling collector present
+  // in the stack. This enforces checking that no path is without a sampling
+  // collector.
+  // Only check if the sampler could actually sample something in this call path.
+  assert(!JvmtiExport::should_post_sampled_object_alloc()
+         || !JvmtiSampledObjectAllocEventCollector::object_alloc_is_safe_to_sample()
+         || THREAD->heap_sampler().sampling_collector_present(),
+         "Sampling collector not present.");
+
+  if (ThreadHeapSampler::enabled()) {
+    // Try to allocate the sampled object from TLAB, it is possible a sample
+    // point was put and the TLAB still has space.
+    obj = THREAD->tlab().allocate_sampled_object(size);
+
+    if (obj != NULL) {
+      return obj;
+    }
+  }
+
   ThreadLocalAllocBuffer& tlab = THREAD->tlab();
 
   // Retain tlab and allocate object in shared space if
@@ -401,7 +422,7 @@
   // between minimal and new_tlab_size is accepted.
   size_t actual_tlab_size = 0;
   size_t min_tlab_size = ThreadLocalAllocBuffer::compute_min_size(size);
-  HeapWord* obj = Universe::heap()->allocate_new_tlab(min_tlab_size, new_tlab_size, &actual_tlab_size);
+  obj = Universe::heap()->allocate_new_tlab(min_tlab_size, new_tlab_size, &actual_tlab_size);
   if (obj == NULL) {
     assert(actual_tlab_size == 0, "Allocation failed, but actual size was updated. min: " SIZE_FORMAT ", desired: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
            min_tlab_size, new_tlab_size, actual_tlab_size);
@@ -425,6 +446,14 @@
     Copy::fill_to_words(obj + hdr_size, actual_tlab_size - hdr_size, badHeapWordVal);
 #endif // ASSERT
   }
+
+  // Send the thread information about this allocation in case a sample is
+  // requested.
+  if (ThreadHeapSampler::enabled()) {
+    size_t tlab_bytes_since_last_sample = THREAD->tlab().bytes_since_last_sample_point();
+    THREAD->heap_sampler().check_for_sampling(obj, size, tlab_bytes_since_last_sample);
+  }
+
   tlab.fill(obj, obj + size, actual_tlab_size);
   return obj;
 }
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -194,6 +194,18 @@
 
   virtual void trace_heap(GCWhen::Type when, const GCTracer* tracer);
 
+  // Internal allocation methods.
+  inline static HeapWord* common_allocate_memory(Klass* klass, int size,
+                                                 void (*post_setup)(Klass*, HeapWord*, int),
+                                                 int size_for_post, bool init_memory,
+                                                 TRAPS);
+
+  // Internal allocation method for common obj/class/array allocations.
+  inline static HeapWord* allocate_memory(Klass* klass, int size,
+                                          void (*post_setup)(Klass*, HeapWord*, int),
+                                          int size_for_post, bool init_memory,
+                                          TRAPS);
+
   // Verification functions
   virtual void check_for_bad_heap_word_value(HeapWord* addr, size_t size)
     PRODUCT_RETURN;
--- a/src/hotspot/share/gc/shared/collectedHeap.inline.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/gc/shared/collectedHeap.inline.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -34,6 +34,7 @@
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "runtime/handles.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "services/lowMemoryDetector.hpp"
 #include "utilities/align.hpp"
@@ -200,9 +201,15 @@
   NOT_PRODUCT(Universe::heap()->check_for_non_bad_heap_word_value(result, size));
   assert(!HAS_PENDING_EXCEPTION,
          "Unexpected exception, will result in uninitialized storage");
-  THREAD->incr_allocated_bytes(size * HeapWordSize);
+  size_t size_in_bytes = size * HeapWordSize;
+  THREAD->incr_allocated_bytes(size_in_bytes);
+
+  AllocTracer::send_allocation_outside_tlab(klass, result, size_in_bytes, THREAD);
 
-  AllocTracer::send_allocation_outside_tlab(klass, result, size * HeapWordSize, THREAD);
+  if (ThreadHeapSampler::enabled()) {
+    THREAD->heap_sampler().check_for_sampling(result, size_in_bytes);
+  }
+
   return result;
 }
 
@@ -214,12 +221,58 @@
   Copy::fill_to_aligned_words(obj + hs, size - hs);
 }
 
+HeapWord* CollectedHeap::common_allocate_memory(Klass* klass, int size,
+                                                void (*post_setup)(Klass*, HeapWord*, int),
+                                                int size_for_post, bool init_memory,
+                                                TRAPS) {
+  HeapWord* obj;
+  if (init_memory) {
+    obj = common_mem_allocate_init(klass, size, CHECK_NULL);
+  } else {
+    obj = common_mem_allocate_noinit(klass, size, CHECK_NULL);
+  }
+  post_setup(klass, obj, size_for_post);
+  return obj;
+}
+
+HeapWord* CollectedHeap::allocate_memory(Klass* klass, int size,
+                                         void (*post_setup)(Klass*, HeapWord*, int),
+                                         int size_for_post, bool init_memory,
+                                         TRAPS) {
+  HeapWord* obj;
+
+  assert(JavaThread::current()->heap_sampler().add_sampling_collector(),
+         "Should never return false.");
+
+  if (JvmtiExport::should_post_sampled_object_alloc()) {
+    HandleMark hm(THREAD);
+    Handle obj_h;
+    {
+      JvmtiSampledObjectAllocEventCollector collector;
+      obj = common_allocate_memory(klass, size, post_setup, size_for_post,
+                                   init_memory, CHECK_NULL);
+      // If we want to be sampling, protect the allocated object with a Handle
+      // before doing the callback. The callback is done in the destructor of
+      // the JvmtiSampledObjectAllocEventCollector.
+      obj_h = Handle(THREAD, (oop) obj);
+    }
+    obj = (HeapWord*) obj_h();
+  } else {
+    obj = common_allocate_memory(klass, size, post_setup, size_for_post,
+                                 init_memory, CHECK_NULL);
+  }
+
+  assert(JavaThread::current()->heap_sampler().remove_sampling_collector(),
+         "Should never return false.");
+  return obj;
+}
+
 oop CollectedHeap::obj_allocate(Klass* klass, int size, TRAPS) {
   debug_only(check_for_valid_allocation_state());
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
-  HeapWord* obj = common_mem_allocate_init(klass, size, CHECK_NULL);
-  post_allocation_setup_obj(klass, obj, size);
+  HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_obj,
+                                  size, true, CHECK_NULL);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
   return (oop)obj;
 }
@@ -228,8 +281,8 @@
   debug_only(check_for_valid_allocation_state());
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
-  HeapWord* obj = common_mem_allocate_init(klass, size, CHECK_NULL);
-  post_allocation_setup_class(klass, obj, size); // set oop_size
+  HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_class,
+                                  size, true, CHECK_NULL);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
   return (oop)obj;
 }
@@ -241,8 +294,8 @@
   debug_only(check_for_valid_allocation_state());
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
-  HeapWord* obj = common_mem_allocate_init(klass, size, CHECK_NULL);
-  post_allocation_setup_array(klass, obj, length);
+  HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_array,
+                                  length, true, CHECK_NULL);
   NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
   return (oop)obj;
 }
@@ -254,9 +307,9 @@
   debug_only(check_for_valid_allocation_state());
   assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
   assert(size >= 0, "int won't convert to size_t");
-  HeapWord* obj = common_mem_allocate_noinit(klass, size, CHECK_NULL);
-  ((oop)obj)->set_klass_gap(0);
-  post_allocation_setup_array(klass, obj, length);
+
+  HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_array,
+                                  length, false, CHECK_NULL);
 #ifndef PRODUCT
   const size_t hs = oopDesc::header_size()+1;
   Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs);
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -45,6 +45,14 @@
   make_parsable(true);   // also retire the TLAB
 }
 
+size_t ThreadLocalAllocBuffer::remaining() {
+  if (end() == NULL) {
+    return 0;
+  }
+
+  return pointer_delta(hard_end(), top());
+}
+
 void ThreadLocalAllocBuffer::accumulate_statistics_before_gc() {
   global_stats()->initialize();
 
@@ -121,10 +129,12 @@
       set_top(NULL);
       set_pf_top(NULL);
       set_end(NULL);
+      set_allocation_end(NULL);
     }
   }
   assert(!(retire || ZeroTLAB)  ||
-         (start() == NULL && end() == NULL && top() == NULL),
+         (start() == NULL && end() == NULL && top() == NULL &&
+          _allocation_end == NULL),
          "TLAB must be reset");
 }
 
@@ -172,8 +182,13 @@
   _allocated_size += new_size;
   print_stats("fill");
   assert(top <= start + new_size - alignment_reserve(), "size too small");
+
   initialize(start, top, start + new_size - alignment_reserve());
 
+  if (ThreadHeapSampler::enabled()) {
+    set_sample_end();
+  }
+
   // Reset amount of internal fragmentation
   set_refill_waste_limit(initial_refill_waste_limit());
 }
@@ -185,6 +200,7 @@
   set_top(top);
   set_pf_top(top);
   set_end(end);
+  set_allocation_end(end);
   invariants();
 }
 
@@ -306,12 +322,45 @@
   guarantee(p == top(), "end of last object must match end of space");
 }
 
+void ThreadLocalAllocBuffer::set_sample_end() {
+  size_t heap_words_remaining = pointer_delta(_end, _top);
+  size_t bytes_until_sample = myThread()->heap_sampler().bytes_until_sample();
+  size_t words_until_sample = bytes_until_sample / HeapWordSize;;
+
+  if (heap_words_remaining > words_until_sample) {
+    HeapWord* new_end = _top + words_until_sample;
+    set_end(new_end);
+    _bytes_since_last_sample_point = bytes_until_sample;
+  } else {
+    _bytes_since_last_sample_point = heap_words_remaining * HeapWordSize;;
+  }
+}
+
 Thread* ThreadLocalAllocBuffer::myThread() {
   return (Thread*)(((char *)this) +
                    in_bytes(start_offset()) -
                    in_bytes(Thread::tlab_start_offset()));
 }
 
+void ThreadLocalAllocBuffer::set_back_allocation_end() {
+  _end = _allocation_end;
+}
+
+HeapWord* ThreadLocalAllocBuffer::allocate_sampled_object(size_t size) {
+  set_back_allocation_end();
+  HeapWord* result = allocate(size);
+
+  if (result) {
+    myThread()->heap_sampler().check_for_sampling(result, size * HeapWordSize, _bytes_since_last_sample_point);
+    set_sample_end();
+  }
+
+  return result;
+}
+
+HeapWord* ThreadLocalAllocBuffer::hard_end() {
+  return _allocation_end + alignment_reserve();
+}
 
 GlobalTLABStats::GlobalTLABStats() :
   _allocating_threads_avg(TLABAllocationWeight) {
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -37,6 +37,12 @@
 //            It is thread-private at any time, but maybe multiplexed over
 //            time across multiple threads. The park()/unpark() pair is
 //            used to make it available for such multiplexing.
+//
+//            Heap sampling is performed via the end and allocation_end
+//            fields.
+//            allocation_end contains the real end of the tlab allocation,
+//            whereas end can be set to an arbitrary spot in the tlab to
+//            trip the return and sample the allocation.
 class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
   friend class VMStructs;
   friend class JVMCIVMStructs;
@@ -44,10 +50,13 @@
   HeapWord* _start;                              // address of TLAB
   HeapWord* _top;                                // address after last allocation
   HeapWord* _pf_top;                             // allocation prefetch watermark
-  HeapWord* _end;                                // allocation end (excluding alignment_reserve)
+  HeapWord* _end;                                // allocation end (can be the sampling end point or _allocation_end)
+  HeapWord* _allocation_end;                     // end for allocations (actual TLAB end, excluding alignment_reserve)
+
   size_t    _desired_size;                       // desired size   (including alignment_reserve)
   size_t    _refill_waste_limit;                 // hold onto tlab if free() is larger than this
   size_t    _allocated_before_last_gc;           // total bytes allocated up until the last gc
+  size_t    _bytes_since_last_sample_point;      // bytes since last sample point.
 
   static size_t   _max_size;                          // maximum size of any TLAB
   static int      _reserve_for_allocation_prefetch;   // Reserve at the end of the TLAB
@@ -67,6 +76,7 @@
 
   void set_start(HeapWord* start)                { _start = start; }
   void set_end(HeapWord* end)                    { _end = end; }
+  void set_allocation_end(HeapWord* ptr)         { _allocation_end = ptr; }
   void set_top(HeapWord* top)                    { _top = top; }
   void set_pf_top(HeapWord* pf_top)              { _pf_top = pf_top; }
   void set_desired_size(size_t desired_size)     { _desired_size = desired_size; }
@@ -77,7 +87,7 @@
   static int    target_refills()                 { return _target_refills; }
   size_t initial_desired_size();
 
-  size_t remaining() const                       { return end() == NULL ? 0 : pointer_delta(hard_end(), top()); }
+  size_t remaining();
 
   bool is_last_allocation(HeapWord* obj, size_t size) { return pointer_delta(top(), obj) == size; }
 
@@ -118,8 +128,8 @@
 
   HeapWord* start() const                        { return _start; }
   HeapWord* end() const                          { return _end; }
-  HeapWord* hard_end() const                     { return _end + alignment_reserve(); }
   HeapWord* top() const                          { return _top; }
+  HeapWord* hard_end();
   HeapWord* pf_top() const                       { return _pf_top; }
   size_t desired_size() const                    { return _desired_size; }
   size_t used() const                            { return pointer_delta(top(), start()); }
@@ -127,9 +137,11 @@
   size_t free() const                            { return pointer_delta(end(), top()); }
   // Don't discard tlab if remaining space is larger than this.
   size_t refill_waste_limit() const              { return _refill_waste_limit; }
+  size_t bytes_since_last_sample_point() const   { return _bytes_since_last_sample_point; }
 
   // Allocate size HeapWords. The memory is NOT initialized to zero.
   inline HeapWord* allocate(size_t size);
+  HeapWord* allocate_sampled_object(size_t size);
 
   // Undo last allocation.
   inline bool undo_allocate(HeapWord* obj, size_t size);
@@ -171,6 +183,9 @@
   void fill(HeapWord* start, HeapWord* top, size_t new_size);
   void initialize();
 
+  void set_back_allocation_end();
+  void set_sample_end();
+
   static size_t refill_waste_limit_increment()   { return TLABWasteIncrement; }
 
   template <typename T> void addresses_do(T f) {
@@ -178,6 +193,7 @@
     f(&_top);
     f(&_pf_top);
     f(&_end);
+    f(&_allocation_end);
   }
 
   // Code generation support
--- a/src/hotspot/share/prims/jvmti.xml	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmti.xml	Fri Jun 15 00:49:54 2018 -0700
@@ -10353,6 +10353,14 @@
           See <eventlink id="ClassFileLoadHook"/>.
         </description>
       </capabilityfield>
+      <capabilityfield id="can_generate_sampled_object_alloc_events" since="11">
+        <description>
+          Can generate sampled allocation events.
+          If this capability is enabled then the heap sampling method
+	  <functionlink id="SetHeapSamplingRate"></functionlink> can be
+	  called and <eventlink id="SampledObjectAlloc"></eventlink> events can be generated.
+        </description>
+      </capabilityfield>
     </capabilitiestypedef>
 
     <function id="GetPotentialCapabilities" jkernel="yes" phase="onload" num="140">
@@ -11531,6 +11539,47 @@
 
   </category>
 
+  <category id="heap_monitoring" label="Heap Monitoring">
+    <function id="SetHeapSamplingRate" phase="onload" num="156" since="11">
+      <synopsis>Set Heap Sampling Rate</synopsis>
+      <description>
+        Generate a <eventlink id="SampledObjectAlloc"/> event when objects are allocated.
+	Each thread keeps a counter of bytes allocated. The event will only be generated
+	when that counter exceeds an average of <paramlink id="sampling_rate"></paramlink>
+	since the last sample.
+        <p/>
+        Setting <paramlink id="sampling_rate"></paramlink> to 0 will cause an event to be
+	generated by each allocation supported by the system.
+      </description>
+      <origin>new</origin>
+      <capabilities>
+        <required id="can_generate_sampled_object_alloc_events"></required>
+      </capabilities>
+      <parameters>
+        <param id="sampling_rate">
+          <jint/>
+          <description>
+            The sampling rate in bytes. The sampler uses a statistical approach to
+            generate an event, on average, once for every <paramlink id="sampling_rate"/> bytes of
+	    memory allocated by a given thread.
+            <p/>
+            Passing 0 as a sampling rate generates a sample for every allocation.
+            <p/>
+            Note: The overhead of this feature is directly correlated with the sampling rate. 
+	    A high sampling rate, such as 1024 bytes, will incur a high overhead.
+	    A lower rate, such as 1024KB, will have a much lower overhead.  Sampling should only
+	    be used with an understanding that it may impact performance.
+          </description>
+        </param>
+      </parameters>
+      <errors>
+        <error id="JVMTI_ERROR_ILLEGAL_ARGUMENT">
+          <paramlink id="sampling_rate"></paramlink> is less than zero.
+        </error>
+      </errors>
+    </function>
+  </category>
+
 </functionsection>
 
 <errorsection label="Error Reference">
@@ -13495,13 +13544,13 @@
       <param id="object">
 	<jobject/>
 	  <description>
-	    JNI local reference to the object that was allocated
+	    JNI local reference to the object that was allocated.
 	  </description>
       </param>
       <param id="object_klass">
 	<jclass/>
 	  <description>
-	    JNI local reference to the class of the object
+	    JNI local reference to the class of the object.
 	  </description>
       </param>
       <param id="size">
@@ -13513,8 +13562,75 @@
     </parameters>
   </event>
 
+  <event label="Sampled Object Allocation"
+    id="SampledObjectAlloc" const="JVMTI_EVENT_SAMPLED_OBJECT_ALLOC" num="86" since="11">
+    <description>
+      Sent when an allocated object is sampled.
+      By default, the sampling rate is a geometric variable with a 512KB mean.  
+      Each thread tracks how many bytes it has allocated since it sent the last event.
+      When the number of bytes exceeds the sampling rate, it will send another event.
+      This implies that, on average, one object will be sampled every time a thread has
+      allocated 512KB bytes since the last sample.
+      <p/>
+      Note that this is a geometric variable: it will not sample every 512KB precisely.
+      The goal of this is to ensure high quality sampling even if allocation is
+      happening in a fixed pattern (i.e., the same set of objects are being allocated
+      every 512KB).
+      <p/>
+      If another sampling rate is required, the user can call
+      <functionlink id="SetHeapSamplingRate"></functionlink> with a strictly positive integer value, representing
+      the new sampling rate.
+      <p/>
+      This event is sent once the sampled allocation has been performed.  It provides the object, stack trace
+      of the allocation, the thread allocating, the size of allocation, and the object's class.
+      <p/>
+      A typical use case of this system is to determine where heap allocations originate.
+      In conjunction with weak references and the function
+      <functionlink id="GetStackTrace"></functionlink>, a user can track which objects were allocated from which
+      stack trace, and which are still live during the execution of the program.
+    </description>
+    <origin>new</origin>
+    <capabilities>
+      <required id="can_generate_sampled_object_alloc_events"></required>
+    </capabilities>
+    <parameters>
+      <param id="jni_env">
+        <outptr>
+          <struct>JNIEnv</struct>
+        </outptr>
+        <description>
+          The JNI environment of the event (current) thread.
+        </description>
+      </param>
+      <param id="thread">
+        <jthread/>
+        <description>
+          Thread allocating the object.
+        </description>
+      </param>
+      <param id="object">
+        <jobject/>
+        <description>
+          JNI local reference to the object that was allocated.
+        </description>
+      </param>
+      <param id="object_klass">
+        <jclass/>
+        <description>
+          JNI local reference to the class of the object
+        </description>
+      </param>
+      <param id="size">
+        <jlong/>
+        <description>
+          Size of the object (in bytes). See <functionlink id="GetObjectSize"/>.
+        </description>
+      </param>
+    </parameters>
+  </event>
+
   <event label="Object Free"
-	 id="ObjectFree" const="JVMTI_EVENT_OBJECT_FREE" num="83">
+        id="ObjectFree" const="JVMTI_EVENT_OBJECT_FREE" num="83">
     <description>
       An Object Free event is sent when the garbage collector frees an object.
       Events are only sent for tagged objects--see
@@ -13534,7 +13650,7 @@
 	<jlong/>
 	<description>
 	  The freed object's tag
-	</description>
+	</description>        
       </param>
     </parameters>
   </event>
--- a/src/hotspot/share/prims/jvmtiEnv.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiEnv.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -64,6 +64,7 @@
 #include "runtime/reflectionUtils.hpp"
 #include "runtime/signature.hpp"
 #include "runtime/thread.inline.hpp"
+#include "runtime/threadHeapSampler.hpp"
 #include "runtime/threadSMR.hpp"
 #include "runtime/timerTrace.hpp"
 #include "runtime/vframe.inline.hpp"
@@ -537,10 +538,17 @@
     if (event_type == JVMTI_EVENT_CLASS_FILE_LOAD_HOOK && enabled) {
       record_class_file_load_hook_enabled();
     }
+
+    if (event_type == JVMTI_EVENT_SAMPLED_OBJECT_ALLOC) {
+      if (enabled) {
+        ThreadHeapSampler::enable();
+      } else {
+        ThreadHeapSampler::disable();
+      }
+    }
     JvmtiEventController::set_user_enabled(this, (JavaThread*) NULL, event_type, enabled);
   } else {
     // We have a specified event_thread.
-
     JavaThread* java_thread = NULL;
     ThreadsListHandle tlh;
     jvmtiError err = JvmtiExport::cv_external_thread_to_JavaThread(tlh.list(), event_thread, &java_thread, NULL);
@@ -3631,6 +3639,15 @@
   return JVMTI_ERROR_NONE;
 } /* end GetAvailableProcessors */
 
+jvmtiError
+JvmtiEnv::SetHeapSamplingRate(jint sampling_rate) {
+  if (sampling_rate < 0) {
+    return JVMTI_ERROR_ILLEGAL_ARGUMENT;
+  }
+  ThreadHeapSampler::set_sampling_rate(sampling_rate);
+  return JVMTI_ERROR_NONE;
+} /* end SetHeapSamplingRate */
+
   //
   // System Properties functions
   //
--- a/src/hotspot/share/prims/jvmtiEventController.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiEventController.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -84,6 +84,7 @@
 static const jlong  OBJECT_FREE_BIT = (((jlong)1) << (JVMTI_EVENT_OBJECT_FREE - TOTAL_MIN_EVENT_TYPE_VAL));
 static const jlong  RESOURCE_EXHAUSTED_BIT = (((jlong)1) << (JVMTI_EVENT_RESOURCE_EXHAUSTED - TOTAL_MIN_EVENT_TYPE_VAL));
 static const jlong  VM_OBJECT_ALLOC_BIT = (((jlong)1) << (JVMTI_EVENT_VM_OBJECT_ALLOC - TOTAL_MIN_EVENT_TYPE_VAL));
+static const jlong  SAMPLED_OBJECT_ALLOC_BIT = (((jlong)1) << (JVMTI_EVENT_SAMPLED_OBJECT_ALLOC - TOTAL_MIN_EVENT_TYPE_VAL));
 
 // bits for extension events
 static const jlong  CLASS_UNLOAD_BIT = (((jlong)1) << (EXT_EVENT_CLASS_UNLOAD - TOTAL_MIN_EVENT_TYPE_VAL));
@@ -620,6 +621,7 @@
     JvmtiExport::set_should_post_compiled_method_load((any_env_thread_enabled & COMPILED_METHOD_LOAD_BIT) != 0);
     JvmtiExport::set_should_post_compiled_method_unload((any_env_thread_enabled & COMPILED_METHOD_UNLOAD_BIT) != 0);
     JvmtiExport::set_should_post_vm_object_alloc((any_env_thread_enabled & VM_OBJECT_ALLOC_BIT) != 0);
+    JvmtiExport::set_should_post_sampled_object_alloc((any_env_thread_enabled & SAMPLED_OBJECT_ALLOC_BIT) != 0);
 
     // need this if we want thread events or we need them to init data
     JvmtiExport::set_should_post_thread_life((any_env_thread_enabled & NEED_THREAD_LIFE_EVENTS) != 0);
--- a/src/hotspot/share/prims/jvmtiExport.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiExport.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -1028,12 +1028,12 @@
   return k;
 }
 
-class JvmtiVMObjectAllocEventMark : public JvmtiClassEventMark  {
+class JvmtiObjectAllocEventMark : public JvmtiClassEventMark  {
  private:
    jobject _jobj;
    jlong    _size;
  public:
-   JvmtiVMObjectAllocEventMark(JavaThread *thread, oop obj) : JvmtiClassEventMark(thread, oop_to_klass(obj)) {
+   JvmtiObjectAllocEventMark(JavaThread *thread, oop obj) : JvmtiClassEventMark(thread, oop_to_klass(obj)) {
      _jobj = (jobject)to_jobject(obj);
      _size = obj->size() * wordSize;
    };
@@ -1198,6 +1198,7 @@
 bool              JvmtiExport::_should_post_object_free                   = false;
 bool              JvmtiExport::_should_post_resource_exhausted            = false;
 bool              JvmtiExport::_should_post_vm_object_alloc               = false;
+bool              JvmtiExport::_should_post_sampled_object_alloc          = false;
 bool              JvmtiExport::_should_post_on_exceptions                 = false;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2280,7 +2281,7 @@
     // Can not take safepoint here so can not use state_for to get
     // jvmti thread state.
     JvmtiThreadState *state = ((JavaThread*)thread)->jvmti_thread_state();
-    if (state != NULL ) {
+    if (state != NULL) {
       // state is non NULL when VMObjectAllocEventCollector is enabled.
       JvmtiVMObjectAllocEventCollector *collector;
       collector = state->get_vm_object_alloc_event_collector();
@@ -2295,6 +2296,27 @@
   }
 }
 
+// Collect all the sampled allocated objects.
+void JvmtiExport::record_sampled_internal_object_allocation(oop obj) {
+  Thread* thread = Thread::current_or_null();
+  if (thread != NULL && thread->is_Java_thread())  {
+    // Can not take safepoint here.
+    NoSafepointVerifier no_sfpt;
+    // Can not take safepoint here so can not use state_for to get
+    // jvmti thread state.
+    JvmtiThreadState *state = ((JavaThread*)thread)->jvmti_thread_state();
+    if (state != NULL) {
+      // state is non NULL when SampledObjectAllocEventCollector is enabled.
+      JvmtiSampledObjectAllocEventCollector *collector;
+      collector = state->get_sampled_object_alloc_event_collector();
+
+      if (collector != NULL && collector->is_enabled()) {
+        collector->record_allocation(obj);
+      }
+    }
+  }
+}
+
 void JvmtiExport::post_garbage_collection_finish() {
   Thread *thread = Thread::current(); // this event is posted from VM-Thread.
   EVT_TRIG_TRACE(JVMTI_EVENT_GARBAGE_COLLECTION_FINISH,
@@ -2484,8 +2506,7 @@
   }
 }
 
-
-void JvmtiExport::post_vm_object_alloc(JavaThread *thread,  oop object) {
+void JvmtiExport::post_vm_object_alloc(JavaThread *thread, oop object) {
   EVT_TRIG_TRACE(JVMTI_EVENT_VM_OBJECT_ALLOC, ("[%s] Trg vm object alloc triggered",
                       JvmtiTrace::safe_get_thread_name(thread)));
   if (object == NULL) {
@@ -2500,7 +2521,7 @@
                                          JvmtiTrace::safe_get_thread_name(thread),
                                          object==NULL? "NULL" : object->klass()->external_name()));
 
-      JvmtiVMObjectAllocEventMark jem(thread, h());
+      JvmtiObjectAllocEventMark jem(thread, h());
       JvmtiJavaThreadEventTransition jet(thread);
       jvmtiEventVMObjectAlloc callback = env->callbacks()->VMObjectAlloc;
       if (callback != NULL) {
@@ -2511,6 +2532,34 @@
   }
 }
 
+void JvmtiExport::post_sampled_object_alloc(JavaThread *thread, oop object) {
+  EVT_TRIG_TRACE(JVMTI_EVENT_SAMPLED_OBJECT_ALLOC,
+                 ("[%s] Trg sampled object alloc triggered",
+                  JvmtiTrace::safe_get_thread_name(thread)));
+  if (object == NULL) {
+    return;
+  }
+  HandleMark hm(thread);
+  Handle h(thread, object);
+  JvmtiEnvIterator it;
+  for (JvmtiEnv* env = it.first(); env != NULL; env = it.next(env)) {
+    if (env->is_enabled(JVMTI_EVENT_SAMPLED_OBJECT_ALLOC)) {
+      EVT_TRACE(JVMTI_EVENT_SAMPLED_OBJECT_ALLOC,
+                ("[%s] Evt sampled object alloc sent %s",
+                 JvmtiTrace::safe_get_thread_name(thread),
+                 object == NULL ? "NULL" : object->klass()->external_name()));
+
+      JvmtiObjectAllocEventMark jem(thread, h());
+      JvmtiJavaThreadEventTransition jet(thread);
+      jvmtiEventSampledObjectAlloc callback = env->callbacks()->SampledObjectAlloc;
+      if (callback != NULL) {
+        (*callback)(env->jvmti_external(), jem.jni_env(), jem.jni_thread(),
+                    jem.jni_jobject(), jem.jni_class(), jem.size());
+      }
+    }
+  }
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 void JvmtiExport::cleanup_thread(JavaThread* thread) {
@@ -2536,7 +2585,7 @@
 
 void JvmtiExport::oops_do(OopClosure* f) {
   JvmtiCurrentBreakpoints::oops_do(f);
-  JvmtiVMObjectAllocEventCollector::oops_do_for_all_threads(f);
+  JvmtiObjectAllocEventCollector::oops_do_for_all_threads(f);
 }
 
 void JvmtiExport::weak_oops_do(BoolObjectClosure* is_alive, OopClosure* f) {
@@ -2669,12 +2718,28 @@
   } else if (is_dynamic_code_event()) {
     _prev = state->get_dynamic_code_event_collector();
     state->set_dynamic_code_event_collector((JvmtiDynamicCodeEventCollector *)this);
+  } else if (is_sampled_object_alloc_event()) {
+    JvmtiSampledObjectAllocEventCollector *prev = state->get_sampled_object_alloc_event_collector();
+
+    if (prev) {
+      // JvmtiSampledObjectAllocEventCollector wants only one active collector
+      // enabled. This allows to have a collector detect a user code requiring
+      // a sample in the callback.
+      return;
+    }
+    state->set_sampled_object_alloc_event_collector((JvmtiSampledObjectAllocEventCollector*) this);
   }
+
+  _unset_jvmti_thread_state = true;
 }
 
 // Unset current event collection in this thread and reset it with previous
 // collector.
 void JvmtiEventCollector::unset_jvmti_thread_state() {
+  if (!_unset_jvmti_thread_state) {
+    return;
+  }
+
   JvmtiThreadState* state = JavaThread::current()->jvmti_thread_state();
   if (state != NULL) {
     // restore the previous event collector (if any)
@@ -2685,14 +2750,19 @@
         // this thread's jvmti state was created during the scope of
         // the event collector.
       }
-    } else {
-      if (is_dynamic_code_event()) {
-        if (state->get_dynamic_code_event_collector() == this) {
-          state->set_dynamic_code_event_collector((JvmtiDynamicCodeEventCollector *)_prev);
-        } else {
-          // this thread's jvmti state was created during the scope of
-          // the event collector.
-        }
+    } else if (is_dynamic_code_event()) {
+      if (state->get_dynamic_code_event_collector() == this) {
+        state->set_dynamic_code_event_collector((JvmtiDynamicCodeEventCollector *)_prev);
+      } else {
+        // this thread's jvmti state was created during the scope of
+        // the event collector.
+      }
+    } else if (is_sampled_object_alloc_event()) {
+      if (state->get_sampled_object_alloc_event_collector() == this) {
+        state->set_sampled_object_alloc_event_collector((JvmtiSampledObjectAllocEventCollector*)_prev);
+      } else {
+        // this thread's jvmti state was created during the scope of
+        // the event collector.
       }
     }
   }
@@ -2730,31 +2800,25 @@
 }
 
 // Setup current thread to record vm allocated objects.
-JvmtiVMObjectAllocEventCollector::JvmtiVMObjectAllocEventCollector() : _allocated(NULL) {
-  if (JvmtiExport::should_post_vm_object_alloc()) {
-    _enable = true;
-    setup_jvmti_thread_state();
-  } else {
-    _enable = false;
-  }
+JvmtiObjectAllocEventCollector::JvmtiObjectAllocEventCollector() :
+    _allocated(NULL), _enable(false), _post_callback(NULL) {
 }
 
 // Post vm_object_alloc event for vm allocated objects visible to java
 // world.
-JvmtiVMObjectAllocEventCollector::~JvmtiVMObjectAllocEventCollector() {
-  if (_allocated != NULL) {
+void JvmtiObjectAllocEventCollector::generate_call_for_allocated() {
+  if (_allocated) {
     set_enabled(false);
     for (int i = 0; i < _allocated->length(); i++) {
       oop obj = _allocated->at(i);
-      JvmtiExport::post_vm_object_alloc(JavaThread::current(), obj);
+      _post_callback(JavaThread::current(), obj);
     }
-    delete _allocated;
+    delete _allocated, _allocated = NULL;
   }
-  unset_jvmti_thread_state();
 }
 
-void JvmtiVMObjectAllocEventCollector::record_allocation(oop obj) {
-  assert(is_enabled(), "VM object alloc event collector is not enabled");
+void JvmtiObjectAllocEventCollector::record_allocation(oop obj) {
+  assert(is_enabled(), "Object alloc event collector is not enabled");
   if (_allocated == NULL) {
     _allocated = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<oop>(1, true);
   }
@@ -2762,9 +2826,9 @@
 }
 
 // GC support.
-void JvmtiVMObjectAllocEventCollector::oops_do(OopClosure* f) {
-  if (_allocated != NULL) {
-    for(int i=_allocated->length() - 1; i >= 0; i--) {
+void JvmtiObjectAllocEventCollector::oops_do(OopClosure* f) {
+  if (_allocated) {
+    for(int i = _allocated->length() - 1; i >= 0; i--) {
       if (_allocated->at(i) != NULL) {
         f->do_oop(_allocated->adr_at(i));
       }
@@ -2772,7 +2836,7 @@
   }
 }
 
-void JvmtiVMObjectAllocEventCollector::oops_do_for_all_threads(OopClosure* f) {
+void JvmtiObjectAllocEventCollector::oops_do_for_all_threads(OopClosure* f) {
   // no-op if jvmti not enabled
   if (!JvmtiEnv::environments_might_exist()) {
     return;
@@ -2781,11 +2845,17 @@
   for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jthr = jtiwh.next(); ) {
     JvmtiThreadState *state = jthr->jvmti_thread_state();
     if (state != NULL) {
-      JvmtiVMObjectAllocEventCollector *collector;
+      JvmtiObjectAllocEventCollector *collector;
       collector = state->get_vm_object_alloc_event_collector();
       while (collector != NULL) {
         collector->oops_do(f);
-        collector = (JvmtiVMObjectAllocEventCollector *)collector->get_prev();
+        collector = (JvmtiObjectAllocEventCollector*) collector->get_prev();
+      }
+
+      collector = state->get_sampled_object_alloc_event_collector();
+      while (collector != NULL) {
+        collector->oops_do(f);
+        collector = (JvmtiObjectAllocEventCollector*) collector->get_prev();
       }
     }
   }
@@ -2820,6 +2890,63 @@
   }
 };
 
+// Setup current thread to record vm allocated objects.
+JvmtiVMObjectAllocEventCollector::JvmtiVMObjectAllocEventCollector() {
+  if (JvmtiExport::should_post_vm_object_alloc()) {
+    _enable = true;
+    setup_jvmti_thread_state();
+    _post_callback = JvmtiExport::post_vm_object_alloc;
+  }
+}
+
+JvmtiVMObjectAllocEventCollector::~JvmtiVMObjectAllocEventCollector() {
+  if (_enable) {
+    generate_call_for_allocated();
+  }
+  unset_jvmti_thread_state();
+}
+
+bool JvmtiSampledObjectAllocEventCollector::object_alloc_is_safe_to_sample() {
+  Thread* thread = Thread::current();
+  // Really only sample allocations if this is a JavaThread and not the compiler
+  // thread.
+  if (!thread->is_Java_thread() || thread->is_Compiler_thread()) {
+    return false;
+  }
+
+  if (Compile_lock->owner() == thread ||
+      MultiArray_lock->owner() == thread) {
+    return false;
+  }
+  return true;
+}
+
+// Setup current thread to record sampled allocated objects.
+JvmtiSampledObjectAllocEventCollector::JvmtiSampledObjectAllocEventCollector() {
+  if (JvmtiExport::should_post_sampled_object_alloc()) {
+    if (!object_alloc_is_safe_to_sample()) {
+      return;
+    }
+
+    _enable = true;
+    setup_jvmti_thread_state();
+    _post_callback = JvmtiExport::post_sampled_object_alloc;
+  }
+}
+
+JvmtiSampledObjectAllocEventCollector::~JvmtiSampledObjectAllocEventCollector() {
+  if (!_enable) {
+    return;
+  }
+
+  generate_call_for_allocated();
+  unset_jvmti_thread_state();
+
+  // Unset the sampling collector as present in assertion mode only.
+  assert(Thread::current()->is_Java_thread(),
+         "Should always be in a Java thread");
+}
+
 JvmtiGCMarker::JvmtiGCMarker() {
   // if there aren't any JVMTI environments then nothing to do
   if (!JvmtiEnv::environments_might_exist()) {
--- a/src/hotspot/share/prims/jvmtiExport.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiExport.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -123,6 +123,7 @@
   // breakpoint info
   JVMTI_SUPPORT_FLAG(should_clean_up_heap_objects)
   JVMTI_SUPPORT_FLAG(should_post_vm_object_alloc)
+  JVMTI_SUPPORT_FLAG(should_post_sampled_object_alloc)
 
   // If flag cannot be implemented, give an error if on=true
   static void report_unsupported(bool on);
@@ -363,6 +364,18 @@
       record_vm_internal_object_allocation(object);
     }
   }
+
+  static void record_sampled_internal_object_allocation(oop object) NOT_JVMTI_RETURN;
+  // Post objects collected by sampled_object_alloc_event_collector.
+  static void post_sampled_object_alloc(JavaThread *thread, oop object) NOT_JVMTI_RETURN;
+
+  // Collects vm internal objects for later event posting.
+  inline static void sampled_object_alloc_event_collector(oop object) {
+    if (should_post_sampled_object_alloc()) {
+      record_sampled_internal_object_allocation(object);
+    }
+  }
+
   inline static void post_array_size_exhausted() {
     if (should_post_resource_exhausted()) {
       post_resource_exhausted(JVMTI_RESOURCE_EXHAUSTED_OOM_ERROR,
@@ -422,12 +435,16 @@
 class JvmtiEventCollector : public StackObj {
  private:
   JvmtiEventCollector* _prev;  // Save previous one to support nested event collector.
+  bool _unset_jvmti_thread_state;
 
  public:
-  void setup_jvmti_thread_state(); // Set this collector in current thread.
+  JvmtiEventCollector() : _prev(NULL), _unset_jvmti_thread_state(false) {}
+
+  void setup_jvmti_thread_state(); // Set this collector in current thread, returns if success.
   void unset_jvmti_thread_state(); // Reset previous collector in current thread.
   virtual bool is_dynamic_code_event()   { return false; }
   virtual bool is_vm_object_alloc_event(){ return false; }
+  virtual bool is_sampled_object_alloc_event(){ return false; }
   JvmtiEventCollector *get_prev()        { return _prev; }
 };
 
@@ -462,42 +479,67 @@
 
 };
 
-// Used to record vm internally allocated object oops and post
-// vm object alloc event for objects visible to java world.
-// Constructor enables JvmtiThreadState flag and all vm allocated
-// objects are recorded in a growable array. When destructor is
-// called the vm object alloc event is posted for each objects
-// visible to java world.
-// See jvm.cpp file for its usage.
+// Used as a base class for object allocation collection and then posting
+// the allocations to any event notification callbacks.
 //
-class JvmtiVMObjectAllocEventCollector : public JvmtiEventCollector {
- private:
-  GrowableArray<oop>* _allocated; // field to record vm internally allocated object oop.
-  bool _enable;                   // This flag is enabled in constructor and disabled
-                                  // in destructor before posting event. To avoid
+class JvmtiObjectAllocEventCollector : public JvmtiEventCollector {
+ protected:
+  GrowableArray<oop>* _allocated;      // field to record collected allocated object oop.
+  bool _enable;                   // This flag is enabled in constructor if set up in the thread state
+                                  // and disabled in destructor before posting event. To avoid
                                   // collection of objects allocated while running java code inside
-                                  // agent post_vm_object_alloc() event handler.
+                                  // agent post_X_object_alloc() event handler.
+  void (*_post_callback)(JavaThread*, oop); // what callback to use when destroying the collector.
 
   //GC support
   void oops_do(OopClosure* f);
 
   friend class JvmtiExport;
-  // Record vm allocated object oop.
+
+  // Record allocated object oop.
   inline void record_allocation(oop obj);
 
   //GC support
   static void oops_do_for_all_threads(OopClosure* f);
 
  public:
-  JvmtiVMObjectAllocEventCollector()  NOT_JVMTI_RETURN;
-  ~JvmtiVMObjectAllocEventCollector() NOT_JVMTI_RETURN;
-  bool is_vm_object_alloc_event()   { return true; }
+  JvmtiObjectAllocEventCollector()  NOT_JVMTI_RETURN;
+
+  void generate_call_for_allocated();
 
   bool is_enabled()                 { return _enable; }
   void set_enabled(bool on)         { _enable = on; }
 };
 
+// Used to record vm internally allocated object oops and post
+// vm object alloc event for objects visible to java world.
+// Constructor enables JvmtiThreadState flag and all vm allocated
+// objects are recorded in a growable array. When destructor is
+// called the vm object alloc event is posted for each object
+// visible to java world.
+// See jvm.cpp file for its usage.
+//
+class JvmtiVMObjectAllocEventCollector : public JvmtiObjectAllocEventCollector {
+ public:
+  JvmtiVMObjectAllocEventCollector()  NOT_JVMTI_RETURN;
+  ~JvmtiVMObjectAllocEventCollector()  NOT_JVMTI_RETURN;
+  virtual bool is_vm_object_alloc_event()   { return true; }
+};
 
+// Used to record sampled allocated object oops and post
+// sampled object alloc event.
+// Constructor enables JvmtiThreadState flag and all sampled allocated
+// objects are recorded in a growable array. When destructor is
+// called the sampled object alloc event is posted for each sampled object.
+// See jvm.cpp file for its usage.
+//
+class JvmtiSampledObjectAllocEventCollector : public JvmtiObjectAllocEventCollector {
+ public:
+  JvmtiSampledObjectAllocEventCollector()  NOT_JVMTI_RETURN;
+  ~JvmtiSampledObjectAllocEventCollector()  NOT_JVMTI_RETURN;
+  bool is_sampled_object_alloc_event()    { return true; }
+  static bool object_alloc_is_safe_to_sample();
+};
 
 // Marker class to disable the posting of VMObjectAlloc events
 // within its scope.
--- a/src/hotspot/share/prims/jvmtiManageCapabilities.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiManageCapabilities.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -130,6 +130,7 @@
 
   memset(&jc, 0, sizeof(jc));
   jc.can_suspend = 1;
+  jc.can_generate_sampled_object_alloc_events = 1;
   return jc;
 }
 
@@ -410,6 +411,8 @@
     log_trace(jvmti)("can_generate_frame_pop_events");
   if (cap->can_generate_breakpoint_events)
     log_trace(jvmti)("can_generate_breakpoint_events");
+  if (cap->can_generate_sampled_object_alloc_events)
+    log_trace(jvmti)("can_generate_sampled_object_alloc_events");
   if (cap->can_suspend)
     log_trace(jvmti)("can_suspend");
   if (cap->can_redefine_any_class )
--- a/src/hotspot/share/prims/jvmtiThreadState.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiThreadState.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,7 @@
   _head_env_thread_state = NULL;
   _dynamic_code_event_collector = NULL;
   _vm_object_alloc_event_collector = NULL;
+  _sampled_object_alloc_event_collector = NULL;
   _the_class_for_redefinition_verification = NULL;
   _scratch_class_for_redefinition_verification = NULL;
   _cur_stack_depth = UNKNOWN_STACK_DEPTH;
--- a/src/hotspot/share/prims/jvmtiThreadState.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/prims/jvmtiThreadState.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -113,6 +113,8 @@
   JvmtiDynamicCodeEventCollector* _dynamic_code_event_collector;
   // holds the current vm object alloc event collector, NULL if no event collector in use
   JvmtiVMObjectAllocEventCollector* _vm_object_alloc_event_collector;
+  // holds the current sampled object alloc event collector, NULL if no event collector in use
+  JvmtiSampledObjectAllocEventCollector* _sampled_object_alloc_event_collector;
 
   // Should only be created by factory methods
   JvmtiThreadState(JavaThread *thread);
@@ -314,12 +316,18 @@
   JvmtiVMObjectAllocEventCollector* get_vm_object_alloc_event_collector() {
     return _vm_object_alloc_event_collector;
   }
+  JvmtiSampledObjectAllocEventCollector* get_sampled_object_alloc_event_collector() {
+    return _sampled_object_alloc_event_collector;
+  }
   void set_dynamic_code_event_collector(JvmtiDynamicCodeEventCollector* collector) {
     _dynamic_code_event_collector = collector;
   }
   void set_vm_object_alloc_event_collector(JvmtiVMObjectAllocEventCollector* collector) {
     _vm_object_alloc_event_collector = collector;
   }
+  void set_sampled_object_alloc_event_collector(JvmtiSampledObjectAllocEventCollector* collector) {
+    _sampled_object_alloc_event_collector = collector;
+  }
 
 
   //
--- a/src/hotspot/share/runtime/mutexLocker.cpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/runtime/mutexLocker.cpp	Fri Jun 15 00:49:54 2018 -0700
@@ -131,6 +131,8 @@
 Monitor* PeriodicTask_lock            = NULL;
 Monitor* RedefineClasses_lock         = NULL;
 
+Mutex*   ThreadHeapSampler_lock       = NULL;
+
 #if INCLUDE_JFR
 Mutex*   JfrStacktrace_lock           = NULL;
 Monitor* JfrMsg_lock                  = NULL;
@@ -296,6 +298,9 @@
   def(CompileThread_lock           , PaddedMonitor, nonleaf+5,   false, Monitor::_safepoint_check_always);
   def(PeriodicTask_lock            , PaddedMonitor, nonleaf+5,   true,  Monitor::_safepoint_check_sometimes);
   def(RedefineClasses_lock         , PaddedMonitor, nonleaf+5,   true,  Monitor::_safepoint_check_always);
+
+  def(ThreadHeapSampler_lock       , PaddedMutex,   nonleaf,     false, Monitor::_safepoint_check_never);
+
   if (WhiteBoxAPI) {
     def(Compilation_lock           , PaddedMonitor, leaf,        false, Monitor::_safepoint_check_never);
   }
--- a/src/hotspot/share/runtime/mutexLocker.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/runtime/mutexLocker.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -130,6 +130,7 @@
 extern Monitor* Service_lock;                    // a lock used for service thread operation
 extern Monitor* PeriodicTask_lock;               // protects the periodic task structure
 extern Monitor* RedefineClasses_lock;            // locks classes from parallel redefinition
+extern Mutex*   ThreadHeapSampler_lock;          // protects the static data for initialization.
 
 #if INCLUDE_JFR
 extern Mutex*   JfrStacktrace_lock;              // used to guard access to the JFR stacktrace table
--- a/src/hotspot/share/runtime/thread.hpp	Mon Jun 11 15:28:24 2018 +0200
+++ b/src/hotspot/share/runtime/thread.hpp	Fri Jun 15 00:49:54 2018 -0700
@@ -42,6 +42,7 @@
 #include "runtime/park.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/threadHeapSampler.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/unhandledOops.hpp"
 #include "utilities/align.hpp"
@@ -338,6 +339,7 @@
   ThreadLocalAllocBuffer _tlab;                 // Thread-local eden
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
+  ThreadHeapSampler _heap_sampler;              // For use when sampling the memory.
 
   JFR_ONLY(DEFINE_THREAD_LOCAL_FIELD_JFR;)      // Thread-local data for jfr
 
@@ -517,6 +519,8 @@
   void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
   inline jlong cooked_allocated_bytes();
 
+  ThreadHeapSampler& heap_sampler()     { return _heap_sampler; }
+
   JFR_ONLY(DEFINE_THREAD_LOCAL_ACCESSOR_JFR;)
 
   bool is_trace_suspend()               { return (_suspend_flags & _trace_flag) != 0; }