# HG changeset patch # User jcbeyler # Date 1529048994 25200 # Node ID e2a7f431f65ccfb4af8102afd611726f23a345de # Parent bf7e2684cd0a69200b58964bcfb9736e2d914617 8203394: Implementation of JEP 331: Low-Overhead Heap Profiling Summary: Implement Low-Overhead Heap Profiling Reviewed-by: eosterlund, gthornbr, rehn, sspitsyn, tschatzl diff -r bf7e2684cd0a -r e2a7f431f65c make/nb_native/nbproject/configurations.xml --- a/make/nb_native/nbproject/configurations.xml Mon Jun 11 15:28:24 2018 +0200 +++ b/make/nb_native/nbproject/configurations.xml Fri Jun 15 00:49:54 2018 -0700 @@ -6153,6 +6153,9 @@ libIsModifiableModuleTest.c + + libHeapMonitorTest.c + libMAAClassFileLoadHook.c @@ -40154,6 +40157,11 @@ tool="0" flavor2="0"> + + heap_sampler().sampling_collector_present(), + "Sampling collector not present."); + + if (ThreadHeapSampler::enabled()) { + // Try to allocate the sampled object from TLAB, it is possible a sample + // point was put and the TLAB still has space. + obj = THREAD->tlab().allocate_sampled_object(size); + + if (obj != NULL) { + return obj; + } + } + ThreadLocalAllocBuffer& tlab = THREAD->tlab(); // Retain tlab and allocate object in shared space if @@ -401,7 +422,7 @@ // between minimal and new_tlab_size is accepted. size_t actual_tlab_size = 0; size_t min_tlab_size = ThreadLocalAllocBuffer::compute_min_size(size); - HeapWord* obj = Universe::heap()->allocate_new_tlab(min_tlab_size, new_tlab_size, &actual_tlab_size); + obj = Universe::heap()->allocate_new_tlab(min_tlab_size, new_tlab_size, &actual_tlab_size); if (obj == NULL) { assert(actual_tlab_size == 0, "Allocation failed, but actual size was updated. min: " SIZE_FORMAT ", desired: " SIZE_FORMAT ", actual: " SIZE_FORMAT, min_tlab_size, new_tlab_size, actual_tlab_size); @@ -425,6 +446,14 @@ Copy::fill_to_words(obj + hdr_size, actual_tlab_size - hdr_size, badHeapWordVal); #endif // ASSERT } + + // Send the thread information about this allocation in case a sample is + // requested. + if (ThreadHeapSampler::enabled()) { + size_t tlab_bytes_since_last_sample = THREAD->tlab().bytes_since_last_sample_point(); + THREAD->heap_sampler().check_for_sampling(obj, size, tlab_bytes_since_last_sample); + } + tlab.fill(obj, obj + size, actual_tlab_size); return obj; } diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/gc/shared/collectedHeap.hpp --- a/src/hotspot/share/gc/shared/collectedHeap.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/gc/shared/collectedHeap.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -194,6 +194,18 @@ virtual void trace_heap(GCWhen::Type when, const GCTracer* tracer); + // Internal allocation methods. + inline static HeapWord* common_allocate_memory(Klass* klass, int size, + void (*post_setup)(Klass*, HeapWord*, int), + int size_for_post, bool init_memory, + TRAPS); + + // Internal allocation method for common obj/class/array allocations. + inline static HeapWord* allocate_memory(Klass* klass, int size, + void (*post_setup)(Klass*, HeapWord*, int), + int size_for_post, bool init_memory, + TRAPS); + // Verification functions virtual void check_for_bad_heap_word_value(HeapWord* addr, size_t size) PRODUCT_RETURN; diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/gc/shared/collectedHeap.inline.hpp --- a/src/hotspot/share/gc/shared/collectedHeap.inline.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/gc/shared/collectedHeap.inline.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -34,6 +34,7 @@ #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" #include "runtime/sharedRuntime.hpp" +#include "runtime/handles.inline.hpp" #include "runtime/thread.inline.hpp" #include "services/lowMemoryDetector.hpp" #include "utilities/align.hpp" @@ -200,9 +201,15 @@ NOT_PRODUCT(Universe::heap()->check_for_non_bad_heap_word_value(result, size)); assert(!HAS_PENDING_EXCEPTION, "Unexpected exception, will result in uninitialized storage"); - THREAD->incr_allocated_bytes(size * HeapWordSize); + size_t size_in_bytes = size * HeapWordSize; + THREAD->incr_allocated_bytes(size_in_bytes); + + AllocTracer::send_allocation_outside_tlab(klass, result, size_in_bytes, THREAD); - AllocTracer::send_allocation_outside_tlab(klass, result, size * HeapWordSize, THREAD); + if (ThreadHeapSampler::enabled()) { + THREAD->heap_sampler().check_for_sampling(result, size_in_bytes); + } + return result; } @@ -214,12 +221,58 @@ Copy::fill_to_aligned_words(obj + hs, size - hs); } +HeapWord* CollectedHeap::common_allocate_memory(Klass* klass, int size, + void (*post_setup)(Klass*, HeapWord*, int), + int size_for_post, bool init_memory, + TRAPS) { + HeapWord* obj; + if (init_memory) { + obj = common_mem_allocate_init(klass, size, CHECK_NULL); + } else { + obj = common_mem_allocate_noinit(klass, size, CHECK_NULL); + } + post_setup(klass, obj, size_for_post); + return obj; +} + +HeapWord* CollectedHeap::allocate_memory(Klass* klass, int size, + void (*post_setup)(Klass*, HeapWord*, int), + int size_for_post, bool init_memory, + TRAPS) { + HeapWord* obj; + + assert(JavaThread::current()->heap_sampler().add_sampling_collector(), + "Should never return false."); + + if (JvmtiExport::should_post_sampled_object_alloc()) { + HandleMark hm(THREAD); + Handle obj_h; + { + JvmtiSampledObjectAllocEventCollector collector; + obj = common_allocate_memory(klass, size, post_setup, size_for_post, + init_memory, CHECK_NULL); + // If we want to be sampling, protect the allocated object with a Handle + // before doing the callback. The callback is done in the destructor of + // the JvmtiSampledObjectAllocEventCollector. + obj_h = Handle(THREAD, (oop) obj); + } + obj = (HeapWord*) obj_h(); + } else { + obj = common_allocate_memory(klass, size, post_setup, size_for_post, + init_memory, CHECK_NULL); + } + + assert(JavaThread::current()->heap_sampler().remove_sampling_collector(), + "Should never return false."); + return obj; +} + oop CollectedHeap::obj_allocate(Klass* klass, int size, TRAPS) { debug_only(check_for_valid_allocation_state()); assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed"); assert(size >= 0, "int won't convert to size_t"); - HeapWord* obj = common_mem_allocate_init(klass, size, CHECK_NULL); - post_allocation_setup_obj(klass, obj, size); + HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_obj, + size, true, CHECK_NULL); NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size)); return (oop)obj; } @@ -228,8 +281,8 @@ debug_only(check_for_valid_allocation_state()); assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed"); assert(size >= 0, "int won't convert to size_t"); - HeapWord* obj = common_mem_allocate_init(klass, size, CHECK_NULL); - post_allocation_setup_class(klass, obj, size); // set oop_size + HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_class, + size, true, CHECK_NULL); NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size)); return (oop)obj; } @@ -241,8 +294,8 @@ debug_only(check_for_valid_allocation_state()); assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed"); assert(size >= 0, "int won't convert to size_t"); - HeapWord* obj = common_mem_allocate_init(klass, size, CHECK_NULL); - post_allocation_setup_array(klass, obj, length); + HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_array, + length, true, CHECK_NULL); NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size)); return (oop)obj; } @@ -254,9 +307,9 @@ debug_only(check_for_valid_allocation_state()); assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed"); assert(size >= 0, "int won't convert to size_t"); - HeapWord* obj = common_mem_allocate_noinit(klass, size, CHECK_NULL); - ((oop)obj)->set_klass_gap(0); - post_allocation_setup_array(klass, obj, length); + + HeapWord* obj = allocate_memory(klass, size, post_allocation_setup_array, + length, false, CHECK_NULL); #ifndef PRODUCT const size_t hs = oopDesc::header_size()+1; Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs); diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -45,6 +45,14 @@ make_parsable(true); // also retire the TLAB } +size_t ThreadLocalAllocBuffer::remaining() { + if (end() == NULL) { + return 0; + } + + return pointer_delta(hard_end(), top()); +} + void ThreadLocalAllocBuffer::accumulate_statistics_before_gc() { global_stats()->initialize(); @@ -121,10 +129,12 @@ set_top(NULL); set_pf_top(NULL); set_end(NULL); + set_allocation_end(NULL); } } assert(!(retire || ZeroTLAB) || - (start() == NULL && end() == NULL && top() == NULL), + (start() == NULL && end() == NULL && top() == NULL && + _allocation_end == NULL), "TLAB must be reset"); } @@ -172,8 +182,13 @@ _allocated_size += new_size; print_stats("fill"); assert(top <= start + new_size - alignment_reserve(), "size too small"); + initialize(start, top, start + new_size - alignment_reserve()); + if (ThreadHeapSampler::enabled()) { + set_sample_end(); + } + // Reset amount of internal fragmentation set_refill_waste_limit(initial_refill_waste_limit()); } @@ -185,6 +200,7 @@ set_top(top); set_pf_top(top); set_end(end); + set_allocation_end(end); invariants(); } @@ -306,12 +322,45 @@ guarantee(p == top(), "end of last object must match end of space"); } +void ThreadLocalAllocBuffer::set_sample_end() { + size_t heap_words_remaining = pointer_delta(_end, _top); + size_t bytes_until_sample = myThread()->heap_sampler().bytes_until_sample(); + size_t words_until_sample = bytes_until_sample / HeapWordSize;; + + if (heap_words_remaining > words_until_sample) { + HeapWord* new_end = _top + words_until_sample; + set_end(new_end); + _bytes_since_last_sample_point = bytes_until_sample; + } else { + _bytes_since_last_sample_point = heap_words_remaining * HeapWordSize;; + } +} + Thread* ThreadLocalAllocBuffer::myThread() { return (Thread*)(((char *)this) + in_bytes(start_offset()) - in_bytes(Thread::tlab_start_offset())); } +void ThreadLocalAllocBuffer::set_back_allocation_end() { + _end = _allocation_end; +} + +HeapWord* ThreadLocalAllocBuffer::allocate_sampled_object(size_t size) { + set_back_allocation_end(); + HeapWord* result = allocate(size); + + if (result) { + myThread()->heap_sampler().check_for_sampling(result, size * HeapWordSize, _bytes_since_last_sample_point); + set_sample_end(); + } + + return result; +} + +HeapWord* ThreadLocalAllocBuffer::hard_end() { + return _allocation_end + alignment_reserve(); +} GlobalTLABStats::GlobalTLABStats() : _allocating_threads_avg(TLABAllocationWeight) { diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -37,6 +37,12 @@ // It is thread-private at any time, but maybe multiplexed over // time across multiple threads. The park()/unpark() pair is // used to make it available for such multiplexing. +// +// Heap sampling is performed via the end and allocation_end +// fields. +// allocation_end contains the real end of the tlab allocation, +// whereas end can be set to an arbitrary spot in the tlab to +// trip the return and sample the allocation. class ThreadLocalAllocBuffer: public CHeapObj { friend class VMStructs; friend class JVMCIVMStructs; @@ -44,10 +50,13 @@ HeapWord* _start; // address of TLAB HeapWord* _top; // address after last allocation HeapWord* _pf_top; // allocation prefetch watermark - HeapWord* _end; // allocation end (excluding alignment_reserve) + HeapWord* _end; // allocation end (can be the sampling end point or _allocation_end) + HeapWord* _allocation_end; // end for allocations (actual TLAB end, excluding alignment_reserve) + size_t _desired_size; // desired size (including alignment_reserve) size_t _refill_waste_limit; // hold onto tlab if free() is larger than this size_t _allocated_before_last_gc; // total bytes allocated up until the last gc + size_t _bytes_since_last_sample_point; // bytes since last sample point. static size_t _max_size; // maximum size of any TLAB static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB @@ -67,6 +76,7 @@ void set_start(HeapWord* start) { _start = start; } void set_end(HeapWord* end) { _end = end; } + void set_allocation_end(HeapWord* ptr) { _allocation_end = ptr; } void set_top(HeapWord* top) { _top = top; } void set_pf_top(HeapWord* pf_top) { _pf_top = pf_top; } void set_desired_size(size_t desired_size) { _desired_size = desired_size; } @@ -77,7 +87,7 @@ static int target_refills() { return _target_refills; } size_t initial_desired_size(); - size_t remaining() const { return end() == NULL ? 0 : pointer_delta(hard_end(), top()); } + size_t remaining(); bool is_last_allocation(HeapWord* obj, size_t size) { return pointer_delta(top(), obj) == size; } @@ -118,8 +128,8 @@ HeapWord* start() const { return _start; } HeapWord* end() const { return _end; } - HeapWord* hard_end() const { return _end + alignment_reserve(); } HeapWord* top() const { return _top; } + HeapWord* hard_end(); HeapWord* pf_top() const { return _pf_top; } size_t desired_size() const { return _desired_size; } size_t used() const { return pointer_delta(top(), start()); } @@ -127,9 +137,11 @@ size_t free() const { return pointer_delta(end(), top()); } // Don't discard tlab if remaining space is larger than this. size_t refill_waste_limit() const { return _refill_waste_limit; } + size_t bytes_since_last_sample_point() const { return _bytes_since_last_sample_point; } // Allocate size HeapWords. The memory is NOT initialized to zero. inline HeapWord* allocate(size_t size); + HeapWord* allocate_sampled_object(size_t size); // Undo last allocation. inline bool undo_allocate(HeapWord* obj, size_t size); @@ -171,6 +183,9 @@ void fill(HeapWord* start, HeapWord* top, size_t new_size); void initialize(); + void set_back_allocation_end(); + void set_sample_end(); + static size_t refill_waste_limit_increment() { return TLABWasteIncrement; } template void addresses_do(T f) { @@ -178,6 +193,7 @@ f(&_top); f(&_pf_top); f(&_end); + f(&_allocation_end); } // Code generation support diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmti.xml --- a/src/hotspot/share/prims/jvmti.xml Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmti.xml Fri Jun 15 00:49:54 2018 -0700 @@ -10353,6 +10353,14 @@ See . + + + Can generate sampled allocation events. + If this capability is enabled then the heap sampling method + can be + called and events can be generated. + + @@ -11531,6 +11539,47 @@ + + + Set Heap Sampling Rate + + Generate a event when objects are allocated. + Each thread keeps a counter of bytes allocated. The event will only be generated + when that counter exceeds an average of + since the last sample. +

+ Setting to 0 will cause an event to be + generated by each allocation supported by the system. + + new + + + + + + + + The sampling rate in bytes. The sampler uses a statistical approach to + generate an event, on average, once for every bytes of + memory allocated by a given thread. +

+ Passing 0 as a sampling rate generates a sample for every allocation. +

+ Note: The overhead of this feature is directly correlated with the sampling rate. + A high sampling rate, such as 1024 bytes, will incur a high overhead. + A lower rate, such as 1024KB, will have a much lower overhead. Sampling should only + be used with an understanding that it may impact performance. + + + + + + is less than zero. + + + + + @@ -13495,13 +13544,13 @@ - JNI local reference to the object that was allocated + JNI local reference to the object that was allocated. - JNI local reference to the class of the object + JNI local reference to the class of the object. @@ -13513,8 +13562,75 @@ + + + Sent when an allocated object is sampled. + By default, the sampling rate is a geometric variable with a 512KB mean. + Each thread tracks how many bytes it has allocated since it sent the last event. + When the number of bytes exceeds the sampling rate, it will send another event. + This implies that, on average, one object will be sampled every time a thread has + allocated 512KB bytes since the last sample. +

+ Note that this is a geometric variable: it will not sample every 512KB precisely. + The goal of this is to ensure high quality sampling even if allocation is + happening in a fixed pattern (i.e., the same set of objects are being allocated + every 512KB). +

+ If another sampling rate is required, the user can call + with a strictly positive integer value, representing + the new sampling rate. +

+ This event is sent once the sampled allocation has been performed. It provides the object, stack trace + of the allocation, the thread allocating, the size of allocation, and the object's class. +

+ A typical use case of this system is to determine where heap allocations originate. + In conjunction with weak references and the function + , a user can track which objects were allocated from which + stack trace, and which are still live during the execution of the program. + + new + + + + + + + JNIEnv + + + The JNI environment of the event (current) thread. + + + + + + Thread allocating the object. + + + + + + JNI local reference to the object that was allocated. + + + + + + JNI local reference to the class of the object + + + + + + Size of the object (in bytes). See . + + + + + + id="ObjectFree" const="JVMTI_EVENT_OBJECT_FREE" num="83"> An Object Free event is sent when the garbage collector frees an object. Events are only sent for tagged objects--see @@ -13534,7 +13650,7 @@ The freed object's tag - + diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiEnv.cpp --- a/src/hotspot/share/prims/jvmtiEnv.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiEnv.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -64,6 +64,7 @@ #include "runtime/reflectionUtils.hpp" #include "runtime/signature.hpp" #include "runtime/thread.inline.hpp" +#include "runtime/threadHeapSampler.hpp" #include "runtime/threadSMR.hpp" #include "runtime/timerTrace.hpp" #include "runtime/vframe.inline.hpp" @@ -537,10 +538,17 @@ if (event_type == JVMTI_EVENT_CLASS_FILE_LOAD_HOOK && enabled) { record_class_file_load_hook_enabled(); } + + if (event_type == JVMTI_EVENT_SAMPLED_OBJECT_ALLOC) { + if (enabled) { + ThreadHeapSampler::enable(); + } else { + ThreadHeapSampler::disable(); + } + } JvmtiEventController::set_user_enabled(this, (JavaThread*) NULL, event_type, enabled); } else { // We have a specified event_thread. - JavaThread* java_thread = NULL; ThreadsListHandle tlh; jvmtiError err = JvmtiExport::cv_external_thread_to_JavaThread(tlh.list(), event_thread, &java_thread, NULL); @@ -3631,6 +3639,15 @@ return JVMTI_ERROR_NONE; } /* end GetAvailableProcessors */ +jvmtiError +JvmtiEnv::SetHeapSamplingRate(jint sampling_rate) { + if (sampling_rate < 0) { + return JVMTI_ERROR_ILLEGAL_ARGUMENT; + } + ThreadHeapSampler::set_sampling_rate(sampling_rate); + return JVMTI_ERROR_NONE; +} /* end SetHeapSamplingRate */ + // // System Properties functions // diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiEventController.cpp --- a/src/hotspot/share/prims/jvmtiEventController.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiEventController.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -84,6 +84,7 @@ static const jlong OBJECT_FREE_BIT = (((jlong)1) << (JVMTI_EVENT_OBJECT_FREE - TOTAL_MIN_EVENT_TYPE_VAL)); static const jlong RESOURCE_EXHAUSTED_BIT = (((jlong)1) << (JVMTI_EVENT_RESOURCE_EXHAUSTED - TOTAL_MIN_EVENT_TYPE_VAL)); static const jlong VM_OBJECT_ALLOC_BIT = (((jlong)1) << (JVMTI_EVENT_VM_OBJECT_ALLOC - TOTAL_MIN_EVENT_TYPE_VAL)); +static const jlong SAMPLED_OBJECT_ALLOC_BIT = (((jlong)1) << (JVMTI_EVENT_SAMPLED_OBJECT_ALLOC - TOTAL_MIN_EVENT_TYPE_VAL)); // bits for extension events static const jlong CLASS_UNLOAD_BIT = (((jlong)1) << (EXT_EVENT_CLASS_UNLOAD - TOTAL_MIN_EVENT_TYPE_VAL)); @@ -620,6 +621,7 @@ JvmtiExport::set_should_post_compiled_method_load((any_env_thread_enabled & COMPILED_METHOD_LOAD_BIT) != 0); JvmtiExport::set_should_post_compiled_method_unload((any_env_thread_enabled & COMPILED_METHOD_UNLOAD_BIT) != 0); JvmtiExport::set_should_post_vm_object_alloc((any_env_thread_enabled & VM_OBJECT_ALLOC_BIT) != 0); + JvmtiExport::set_should_post_sampled_object_alloc((any_env_thread_enabled & SAMPLED_OBJECT_ALLOC_BIT) != 0); // need this if we want thread events or we need them to init data JvmtiExport::set_should_post_thread_life((any_env_thread_enabled & NEED_THREAD_LIFE_EVENTS) != 0); diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiExport.cpp --- a/src/hotspot/share/prims/jvmtiExport.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiExport.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -1028,12 +1028,12 @@ return k; } -class JvmtiVMObjectAllocEventMark : public JvmtiClassEventMark { +class JvmtiObjectAllocEventMark : public JvmtiClassEventMark { private: jobject _jobj; jlong _size; public: - JvmtiVMObjectAllocEventMark(JavaThread *thread, oop obj) : JvmtiClassEventMark(thread, oop_to_klass(obj)) { + JvmtiObjectAllocEventMark(JavaThread *thread, oop obj) : JvmtiClassEventMark(thread, oop_to_klass(obj)) { _jobj = (jobject)to_jobject(obj); _size = obj->size() * wordSize; }; @@ -1198,6 +1198,7 @@ bool JvmtiExport::_should_post_object_free = false; bool JvmtiExport::_should_post_resource_exhausted = false; bool JvmtiExport::_should_post_vm_object_alloc = false; +bool JvmtiExport::_should_post_sampled_object_alloc = false; bool JvmtiExport::_should_post_on_exceptions = false; //////////////////////////////////////////////////////////////////////////////////////////////// @@ -2280,7 +2281,7 @@ // Can not take safepoint here so can not use state_for to get // jvmti thread state. JvmtiThreadState *state = ((JavaThread*)thread)->jvmti_thread_state(); - if (state != NULL ) { + if (state != NULL) { // state is non NULL when VMObjectAllocEventCollector is enabled. JvmtiVMObjectAllocEventCollector *collector; collector = state->get_vm_object_alloc_event_collector(); @@ -2295,6 +2296,27 @@ } } +// Collect all the sampled allocated objects. +void JvmtiExport::record_sampled_internal_object_allocation(oop obj) { + Thread* thread = Thread::current_or_null(); + if (thread != NULL && thread->is_Java_thread()) { + // Can not take safepoint here. + NoSafepointVerifier no_sfpt; + // Can not take safepoint here so can not use state_for to get + // jvmti thread state. + JvmtiThreadState *state = ((JavaThread*)thread)->jvmti_thread_state(); + if (state != NULL) { + // state is non NULL when SampledObjectAllocEventCollector is enabled. + JvmtiSampledObjectAllocEventCollector *collector; + collector = state->get_sampled_object_alloc_event_collector(); + + if (collector != NULL && collector->is_enabled()) { + collector->record_allocation(obj); + } + } + } +} + void JvmtiExport::post_garbage_collection_finish() { Thread *thread = Thread::current(); // this event is posted from VM-Thread. EVT_TRIG_TRACE(JVMTI_EVENT_GARBAGE_COLLECTION_FINISH, @@ -2484,8 +2506,7 @@ } } - -void JvmtiExport::post_vm_object_alloc(JavaThread *thread, oop object) { +void JvmtiExport::post_vm_object_alloc(JavaThread *thread, oop object) { EVT_TRIG_TRACE(JVMTI_EVENT_VM_OBJECT_ALLOC, ("[%s] Trg vm object alloc triggered", JvmtiTrace::safe_get_thread_name(thread))); if (object == NULL) { @@ -2500,7 +2521,7 @@ JvmtiTrace::safe_get_thread_name(thread), object==NULL? "NULL" : object->klass()->external_name())); - JvmtiVMObjectAllocEventMark jem(thread, h()); + JvmtiObjectAllocEventMark jem(thread, h()); JvmtiJavaThreadEventTransition jet(thread); jvmtiEventVMObjectAlloc callback = env->callbacks()->VMObjectAlloc; if (callback != NULL) { @@ -2511,6 +2532,34 @@ } } +void JvmtiExport::post_sampled_object_alloc(JavaThread *thread, oop object) { + EVT_TRIG_TRACE(JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + ("[%s] Trg sampled object alloc triggered", + JvmtiTrace::safe_get_thread_name(thread))); + if (object == NULL) { + return; + } + HandleMark hm(thread); + Handle h(thread, object); + JvmtiEnvIterator it; + for (JvmtiEnv* env = it.first(); env != NULL; env = it.next(env)) { + if (env->is_enabled(JVMTI_EVENT_SAMPLED_OBJECT_ALLOC)) { + EVT_TRACE(JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + ("[%s] Evt sampled object alloc sent %s", + JvmtiTrace::safe_get_thread_name(thread), + object == NULL ? "NULL" : object->klass()->external_name())); + + JvmtiObjectAllocEventMark jem(thread, h()); + JvmtiJavaThreadEventTransition jet(thread); + jvmtiEventSampledObjectAlloc callback = env->callbacks()->SampledObjectAlloc; + if (callback != NULL) { + (*callback)(env->jvmti_external(), jem.jni_env(), jem.jni_thread(), + jem.jni_jobject(), jem.jni_class(), jem.size()); + } + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////// void JvmtiExport::cleanup_thread(JavaThread* thread) { @@ -2536,7 +2585,7 @@ void JvmtiExport::oops_do(OopClosure* f) { JvmtiCurrentBreakpoints::oops_do(f); - JvmtiVMObjectAllocEventCollector::oops_do_for_all_threads(f); + JvmtiObjectAllocEventCollector::oops_do_for_all_threads(f); } void JvmtiExport::weak_oops_do(BoolObjectClosure* is_alive, OopClosure* f) { @@ -2669,12 +2718,28 @@ } else if (is_dynamic_code_event()) { _prev = state->get_dynamic_code_event_collector(); state->set_dynamic_code_event_collector((JvmtiDynamicCodeEventCollector *)this); + } else if (is_sampled_object_alloc_event()) { + JvmtiSampledObjectAllocEventCollector *prev = state->get_sampled_object_alloc_event_collector(); + + if (prev) { + // JvmtiSampledObjectAllocEventCollector wants only one active collector + // enabled. This allows to have a collector detect a user code requiring + // a sample in the callback. + return; + } + state->set_sampled_object_alloc_event_collector((JvmtiSampledObjectAllocEventCollector*) this); } + + _unset_jvmti_thread_state = true; } // Unset current event collection in this thread and reset it with previous // collector. void JvmtiEventCollector::unset_jvmti_thread_state() { + if (!_unset_jvmti_thread_state) { + return; + } + JvmtiThreadState* state = JavaThread::current()->jvmti_thread_state(); if (state != NULL) { // restore the previous event collector (if any) @@ -2685,14 +2750,19 @@ // this thread's jvmti state was created during the scope of // the event collector. } - } else { - if (is_dynamic_code_event()) { - if (state->get_dynamic_code_event_collector() == this) { - state->set_dynamic_code_event_collector((JvmtiDynamicCodeEventCollector *)_prev); - } else { - // this thread's jvmti state was created during the scope of - // the event collector. - } + } else if (is_dynamic_code_event()) { + if (state->get_dynamic_code_event_collector() == this) { + state->set_dynamic_code_event_collector((JvmtiDynamicCodeEventCollector *)_prev); + } else { + // this thread's jvmti state was created during the scope of + // the event collector. + } + } else if (is_sampled_object_alloc_event()) { + if (state->get_sampled_object_alloc_event_collector() == this) { + state->set_sampled_object_alloc_event_collector((JvmtiSampledObjectAllocEventCollector*)_prev); + } else { + // this thread's jvmti state was created during the scope of + // the event collector. } } } @@ -2730,31 +2800,25 @@ } // Setup current thread to record vm allocated objects. -JvmtiVMObjectAllocEventCollector::JvmtiVMObjectAllocEventCollector() : _allocated(NULL) { - if (JvmtiExport::should_post_vm_object_alloc()) { - _enable = true; - setup_jvmti_thread_state(); - } else { - _enable = false; - } +JvmtiObjectAllocEventCollector::JvmtiObjectAllocEventCollector() : + _allocated(NULL), _enable(false), _post_callback(NULL) { } // Post vm_object_alloc event for vm allocated objects visible to java // world. -JvmtiVMObjectAllocEventCollector::~JvmtiVMObjectAllocEventCollector() { - if (_allocated != NULL) { +void JvmtiObjectAllocEventCollector::generate_call_for_allocated() { + if (_allocated) { set_enabled(false); for (int i = 0; i < _allocated->length(); i++) { oop obj = _allocated->at(i); - JvmtiExport::post_vm_object_alloc(JavaThread::current(), obj); + _post_callback(JavaThread::current(), obj); } - delete _allocated; + delete _allocated, _allocated = NULL; } - unset_jvmti_thread_state(); } -void JvmtiVMObjectAllocEventCollector::record_allocation(oop obj) { - assert(is_enabled(), "VM object alloc event collector is not enabled"); +void JvmtiObjectAllocEventCollector::record_allocation(oop obj) { + assert(is_enabled(), "Object alloc event collector is not enabled"); if (_allocated == NULL) { _allocated = new (ResourceObj::C_HEAP, mtInternal) GrowableArray(1, true); } @@ -2762,9 +2826,9 @@ } // GC support. -void JvmtiVMObjectAllocEventCollector::oops_do(OopClosure* f) { - if (_allocated != NULL) { - for(int i=_allocated->length() - 1; i >= 0; i--) { +void JvmtiObjectAllocEventCollector::oops_do(OopClosure* f) { + if (_allocated) { + for(int i = _allocated->length() - 1; i >= 0; i--) { if (_allocated->at(i) != NULL) { f->do_oop(_allocated->adr_at(i)); } @@ -2772,7 +2836,7 @@ } } -void JvmtiVMObjectAllocEventCollector::oops_do_for_all_threads(OopClosure* f) { +void JvmtiObjectAllocEventCollector::oops_do_for_all_threads(OopClosure* f) { // no-op if jvmti not enabled if (!JvmtiEnv::environments_might_exist()) { return; @@ -2781,11 +2845,17 @@ for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jthr = jtiwh.next(); ) { JvmtiThreadState *state = jthr->jvmti_thread_state(); if (state != NULL) { - JvmtiVMObjectAllocEventCollector *collector; + JvmtiObjectAllocEventCollector *collector; collector = state->get_vm_object_alloc_event_collector(); while (collector != NULL) { collector->oops_do(f); - collector = (JvmtiVMObjectAllocEventCollector *)collector->get_prev(); + collector = (JvmtiObjectAllocEventCollector*) collector->get_prev(); + } + + collector = state->get_sampled_object_alloc_event_collector(); + while (collector != NULL) { + collector->oops_do(f); + collector = (JvmtiObjectAllocEventCollector*) collector->get_prev(); } } } @@ -2820,6 +2890,63 @@ } }; +// Setup current thread to record vm allocated objects. +JvmtiVMObjectAllocEventCollector::JvmtiVMObjectAllocEventCollector() { + if (JvmtiExport::should_post_vm_object_alloc()) { + _enable = true; + setup_jvmti_thread_state(); + _post_callback = JvmtiExport::post_vm_object_alloc; + } +} + +JvmtiVMObjectAllocEventCollector::~JvmtiVMObjectAllocEventCollector() { + if (_enable) { + generate_call_for_allocated(); + } + unset_jvmti_thread_state(); +} + +bool JvmtiSampledObjectAllocEventCollector::object_alloc_is_safe_to_sample() { + Thread* thread = Thread::current(); + // Really only sample allocations if this is a JavaThread and not the compiler + // thread. + if (!thread->is_Java_thread() || thread->is_Compiler_thread()) { + return false; + } + + if (Compile_lock->owner() == thread || + MultiArray_lock->owner() == thread) { + return false; + } + return true; +} + +// Setup current thread to record sampled allocated objects. +JvmtiSampledObjectAllocEventCollector::JvmtiSampledObjectAllocEventCollector() { + if (JvmtiExport::should_post_sampled_object_alloc()) { + if (!object_alloc_is_safe_to_sample()) { + return; + } + + _enable = true; + setup_jvmti_thread_state(); + _post_callback = JvmtiExport::post_sampled_object_alloc; + } +} + +JvmtiSampledObjectAllocEventCollector::~JvmtiSampledObjectAllocEventCollector() { + if (!_enable) { + return; + } + + generate_call_for_allocated(); + unset_jvmti_thread_state(); + + // Unset the sampling collector as present in assertion mode only. + assert(Thread::current()->is_Java_thread(), + "Should always be in a Java thread"); +} + JvmtiGCMarker::JvmtiGCMarker() { // if there aren't any JVMTI environments then nothing to do if (!JvmtiEnv::environments_might_exist()) { diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiExport.hpp --- a/src/hotspot/share/prims/jvmtiExport.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiExport.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -123,6 +123,7 @@ // breakpoint info JVMTI_SUPPORT_FLAG(should_clean_up_heap_objects) JVMTI_SUPPORT_FLAG(should_post_vm_object_alloc) + JVMTI_SUPPORT_FLAG(should_post_sampled_object_alloc) // If flag cannot be implemented, give an error if on=true static void report_unsupported(bool on); @@ -363,6 +364,18 @@ record_vm_internal_object_allocation(object); } } + + static void record_sampled_internal_object_allocation(oop object) NOT_JVMTI_RETURN; + // Post objects collected by sampled_object_alloc_event_collector. + static void post_sampled_object_alloc(JavaThread *thread, oop object) NOT_JVMTI_RETURN; + + // Collects vm internal objects for later event posting. + inline static void sampled_object_alloc_event_collector(oop object) { + if (should_post_sampled_object_alloc()) { + record_sampled_internal_object_allocation(object); + } + } + inline static void post_array_size_exhausted() { if (should_post_resource_exhausted()) { post_resource_exhausted(JVMTI_RESOURCE_EXHAUSTED_OOM_ERROR, @@ -422,12 +435,16 @@ class JvmtiEventCollector : public StackObj { private: JvmtiEventCollector* _prev; // Save previous one to support nested event collector. + bool _unset_jvmti_thread_state; public: - void setup_jvmti_thread_state(); // Set this collector in current thread. + JvmtiEventCollector() : _prev(NULL), _unset_jvmti_thread_state(false) {} + + void setup_jvmti_thread_state(); // Set this collector in current thread, returns if success. void unset_jvmti_thread_state(); // Reset previous collector in current thread. virtual bool is_dynamic_code_event() { return false; } virtual bool is_vm_object_alloc_event(){ return false; } + virtual bool is_sampled_object_alloc_event(){ return false; } JvmtiEventCollector *get_prev() { return _prev; } }; @@ -462,42 +479,67 @@ }; -// Used to record vm internally allocated object oops and post -// vm object alloc event for objects visible to java world. -// Constructor enables JvmtiThreadState flag and all vm allocated -// objects are recorded in a growable array. When destructor is -// called the vm object alloc event is posted for each objects -// visible to java world. -// See jvm.cpp file for its usage. +// Used as a base class for object allocation collection and then posting +// the allocations to any event notification callbacks. // -class JvmtiVMObjectAllocEventCollector : public JvmtiEventCollector { - private: - GrowableArray* _allocated; // field to record vm internally allocated object oop. - bool _enable; // This flag is enabled in constructor and disabled - // in destructor before posting event. To avoid +class JvmtiObjectAllocEventCollector : public JvmtiEventCollector { + protected: + GrowableArray* _allocated; // field to record collected allocated object oop. + bool _enable; // This flag is enabled in constructor if set up in the thread state + // and disabled in destructor before posting event. To avoid // collection of objects allocated while running java code inside - // agent post_vm_object_alloc() event handler. + // agent post_X_object_alloc() event handler. + void (*_post_callback)(JavaThread*, oop); // what callback to use when destroying the collector. //GC support void oops_do(OopClosure* f); friend class JvmtiExport; - // Record vm allocated object oop. + + // Record allocated object oop. inline void record_allocation(oop obj); //GC support static void oops_do_for_all_threads(OopClosure* f); public: - JvmtiVMObjectAllocEventCollector() NOT_JVMTI_RETURN; - ~JvmtiVMObjectAllocEventCollector() NOT_JVMTI_RETURN; - bool is_vm_object_alloc_event() { return true; } + JvmtiObjectAllocEventCollector() NOT_JVMTI_RETURN; + + void generate_call_for_allocated(); bool is_enabled() { return _enable; } void set_enabled(bool on) { _enable = on; } }; +// Used to record vm internally allocated object oops and post +// vm object alloc event for objects visible to java world. +// Constructor enables JvmtiThreadState flag and all vm allocated +// objects are recorded in a growable array. When destructor is +// called the vm object alloc event is posted for each object +// visible to java world. +// See jvm.cpp file for its usage. +// +class JvmtiVMObjectAllocEventCollector : public JvmtiObjectAllocEventCollector { + public: + JvmtiVMObjectAllocEventCollector() NOT_JVMTI_RETURN; + ~JvmtiVMObjectAllocEventCollector() NOT_JVMTI_RETURN; + virtual bool is_vm_object_alloc_event() { return true; } +}; +// Used to record sampled allocated object oops and post +// sampled object alloc event. +// Constructor enables JvmtiThreadState flag and all sampled allocated +// objects are recorded in a growable array. When destructor is +// called the sampled object alloc event is posted for each sampled object. +// See jvm.cpp file for its usage. +// +class JvmtiSampledObjectAllocEventCollector : public JvmtiObjectAllocEventCollector { + public: + JvmtiSampledObjectAllocEventCollector() NOT_JVMTI_RETURN; + ~JvmtiSampledObjectAllocEventCollector() NOT_JVMTI_RETURN; + bool is_sampled_object_alloc_event() { return true; } + static bool object_alloc_is_safe_to_sample(); +}; // Marker class to disable the posting of VMObjectAlloc events // within its scope. diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiManageCapabilities.cpp --- a/src/hotspot/share/prims/jvmtiManageCapabilities.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiManageCapabilities.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -130,6 +130,7 @@ memset(&jc, 0, sizeof(jc)); jc.can_suspend = 1; + jc.can_generate_sampled_object_alloc_events = 1; return jc; } @@ -410,6 +411,8 @@ log_trace(jvmti)("can_generate_frame_pop_events"); if (cap->can_generate_breakpoint_events) log_trace(jvmti)("can_generate_breakpoint_events"); + if (cap->can_generate_sampled_object_alloc_events) + log_trace(jvmti)("can_generate_sampled_object_alloc_events"); if (cap->can_suspend) log_trace(jvmti)("can_suspend"); if (cap->can_redefine_any_class ) diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiThreadState.cpp --- a/src/hotspot/share/prims/jvmtiThreadState.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiThreadState.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -60,6 +60,7 @@ _head_env_thread_state = NULL; _dynamic_code_event_collector = NULL; _vm_object_alloc_event_collector = NULL; + _sampled_object_alloc_event_collector = NULL; _the_class_for_redefinition_verification = NULL; _scratch_class_for_redefinition_verification = NULL; _cur_stack_depth = UNKNOWN_STACK_DEPTH; diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/prims/jvmtiThreadState.hpp --- a/src/hotspot/share/prims/jvmtiThreadState.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/prims/jvmtiThreadState.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2018 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -113,6 +113,8 @@ JvmtiDynamicCodeEventCollector* _dynamic_code_event_collector; // holds the current vm object alloc event collector, NULL if no event collector in use JvmtiVMObjectAllocEventCollector* _vm_object_alloc_event_collector; + // holds the current sampled object alloc event collector, NULL if no event collector in use + JvmtiSampledObjectAllocEventCollector* _sampled_object_alloc_event_collector; // Should only be created by factory methods JvmtiThreadState(JavaThread *thread); @@ -314,12 +316,18 @@ JvmtiVMObjectAllocEventCollector* get_vm_object_alloc_event_collector() { return _vm_object_alloc_event_collector; } + JvmtiSampledObjectAllocEventCollector* get_sampled_object_alloc_event_collector() { + return _sampled_object_alloc_event_collector; + } void set_dynamic_code_event_collector(JvmtiDynamicCodeEventCollector* collector) { _dynamic_code_event_collector = collector; } void set_vm_object_alloc_event_collector(JvmtiVMObjectAllocEventCollector* collector) { _vm_object_alloc_event_collector = collector; } + void set_sampled_object_alloc_event_collector(JvmtiSampledObjectAllocEventCollector* collector) { + _sampled_object_alloc_event_collector = collector; + } // diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/runtime/mutexLocker.cpp --- a/src/hotspot/share/runtime/mutexLocker.cpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/runtime/mutexLocker.cpp Fri Jun 15 00:49:54 2018 -0700 @@ -131,6 +131,8 @@ Monitor* PeriodicTask_lock = NULL; Monitor* RedefineClasses_lock = NULL; +Mutex* ThreadHeapSampler_lock = NULL; + #if INCLUDE_JFR Mutex* JfrStacktrace_lock = NULL; Monitor* JfrMsg_lock = NULL; @@ -296,6 +298,9 @@ def(CompileThread_lock , PaddedMonitor, nonleaf+5, false, Monitor::_safepoint_check_always); def(PeriodicTask_lock , PaddedMonitor, nonleaf+5, true, Monitor::_safepoint_check_sometimes); def(RedefineClasses_lock , PaddedMonitor, nonleaf+5, true, Monitor::_safepoint_check_always); + + def(ThreadHeapSampler_lock , PaddedMutex, nonleaf, false, Monitor::_safepoint_check_never); + if (WhiteBoxAPI) { def(Compilation_lock , PaddedMonitor, leaf, false, Monitor::_safepoint_check_never); } diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/runtime/mutexLocker.hpp --- a/src/hotspot/share/runtime/mutexLocker.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/runtime/mutexLocker.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -130,6 +130,7 @@ extern Monitor* Service_lock; // a lock used for service thread operation extern Monitor* PeriodicTask_lock; // protects the periodic task structure extern Monitor* RedefineClasses_lock; // locks classes from parallel redefinition +extern Mutex* ThreadHeapSampler_lock; // protects the static data for initialization. #if INCLUDE_JFR extern Mutex* JfrStacktrace_lock; // used to guard access to the JFR stacktrace table diff -r bf7e2684cd0a -r e2a7f431f65c src/hotspot/share/runtime/thread.hpp --- a/src/hotspot/share/runtime/thread.hpp Mon Jun 11 15:28:24 2018 +0200 +++ b/src/hotspot/share/runtime/thread.hpp Fri Jun 15 00:49:54 2018 -0700 @@ -42,6 +42,7 @@ #include "runtime/park.hpp" #include "runtime/safepoint.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/threadHeapSampler.hpp" #include "runtime/threadLocalStorage.hpp" #include "runtime/unhandledOops.hpp" #include "utilities/align.hpp" @@ -338,6 +339,7 @@ ThreadLocalAllocBuffer _tlab; // Thread-local eden jlong _allocated_bytes; // Cumulative number of bytes allocated on // the Java heap + ThreadHeapSampler _heap_sampler; // For use when sampling the memory. JFR_ONLY(DEFINE_THREAD_LOCAL_FIELD_JFR;) // Thread-local data for jfr @@ -517,6 +519,8 @@ void incr_allocated_bytes(jlong size) { _allocated_bytes += size; } inline jlong cooked_allocated_bytes(); + ThreadHeapSampler& heap_sampler() { return _heap_sampler; } + JFR_ONLY(DEFINE_THREAD_LOCAL_ACCESSOR_JFR;) bool is_trace_suspend() { return (_suspend_flags & _trace_flag) != 0; }