50579
|
1 |
/*
|
|
2 |
* Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation.
|
|
8 |
*
|
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
13 |
* accompanied this code).
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License version
|
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 |
*
|
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
20 |
* or visit www.oracle.com if you need additional information or have any
|
|
21 |
* questions.
|
|
22 |
*
|
|
23 |
*/
|
|
24 |
|
|
25 |
#include "precompiled.hpp"
|
|
26 |
#include "runtime/handles.inline.hpp"
|
|
27 |
#include "runtime/orderAccess.hpp"
|
|
28 |
#include "runtime/sharedRuntime.hpp"
|
|
29 |
#include "runtime/threadHeapSampler.hpp"
|
|
30 |
|
|
31 |
// Cheap random number generator
|
|
32 |
uint64_t ThreadHeapSampler::_rnd;
|
|
33 |
// Default is 512kb.
|
|
34 |
int ThreadHeapSampler::_sampling_rate = 512 * 1024;
|
|
35 |
int ThreadHeapSampler::_enabled;
|
|
36 |
|
|
37 |
// Statics for the fast log
|
|
38 |
static const int FastLogNumBits = 10;
|
|
39 |
static const int FastLogMask = (1 << FastLogNumBits) - 1;
|
|
40 |
static double log_table[1<<FastLogNumBits]; // Constant
|
|
41 |
static bool log_table_initialized;
|
|
42 |
|
|
43 |
// Returns the next prng value.
|
|
44 |
// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
|
|
45 |
// This is the lrand64 generator.
|
|
46 |
static uint64_t next_random(uint64_t rnd) {
|
|
47 |
const uint64_t PrngMult = 0x5DEECE66DLL;
|
|
48 |
const uint64_t PrngAdd = 0xB;
|
|
49 |
const uint64_t PrngModPower = 48;
|
50638
|
50 |
const uint64_t PrngModMask = ((uint64_t)1 << PrngModPower) - 1;
|
50579
|
51 |
//assert(IS_SAFE_SIZE_MUL(PrngMult, rnd), "Overflow on multiplication.");
|
|
52 |
//assert(IS_SAFE_SIZE_ADD(PrngMult * rnd, PrngAdd), "Overflow on addition.");
|
|
53 |
return (PrngMult * rnd + PrngAdd) & PrngModMask;
|
|
54 |
}
|
|
55 |
|
|
56 |
static double fast_log2(const double & d) {
|
|
57 |
assert(d>0, "bad value passed to assert");
|
|
58 |
uint64_t x = 0;
|
|
59 |
assert(sizeof(d) == sizeof(x),
|
|
60 |
"double and uint64_t do not have the same size");
|
|
61 |
x = *reinterpret_cast<const uint64_t*>(&d);
|
|
62 |
const uint32_t x_high = x >> 32;
|
|
63 |
assert(FastLogNumBits <= 20, "FastLogNumBits should be less than 20.");
|
|
64 |
const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask;
|
|
65 |
const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
|
|
66 |
return exponent + log_table[y];
|
|
67 |
}
|
|
68 |
|
|
69 |
// Generates a geometric variable with the specified mean (512K by default).
|
|
70 |
// This is done by generating a random number between 0 and 1 and applying
|
|
71 |
// the inverse cumulative distribution function for an exponential.
|
|
72 |
// Specifically: Let m be the inverse of the sample rate, then
|
|
73 |
// the probability distribution function is m*exp(-mx) so the CDF is
|
|
74 |
// p = 1 - exp(-mx), so
|
|
75 |
// q = 1 - p = exp(-mx)
|
|
76 |
// log_e(q) = -mx
|
|
77 |
// -log_e(q)/m = x
|
|
78 |
// log_2(q) * (-log_e(2) * 1/m) = x
|
|
79 |
// In the code, q is actually in the range 1 to 2**26, hence the -26 below
|
|
80 |
void ThreadHeapSampler::pick_next_geometric_sample() {
|
|
81 |
_rnd = next_random(_rnd);
|
|
82 |
// Take the top 26 bits as the random number
|
|
83 |
// (This plus a 1<<58 sampling bound gives a max possible step of
|
|
84 |
// 5194297183973780480 bytes. In this case,
|
|
85 |
// for sample_parameter = 1<<19, max possible step is
|
|
86 |
// 9448372 bytes (24 bits).
|
|
87 |
const uint64_t PrngModPower = 48; // Number of bits in prng
|
|
88 |
// The uint32_t cast is to prevent a (hard-to-reproduce) NAN
|
|
89 |
// under piii debug for some binaries.
|
|
90 |
double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0;
|
|
91 |
// Put the computed p-value through the CDF of a geometric.
|
|
92 |
// For faster performance (save ~1/20th exec time), replace
|
|
93 |
// min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705)
|
|
94 |
// The value 26.000705 is used rather than 26 to compensate
|
|
95 |
// for inaccuracies in FastLog2 which otherwise result in a
|
|
96 |
// negative answer.
|
|
97 |
double log_val = (fast_log2(q) - 26);
|
|
98 |
double result =
|
|
99 |
(0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_sampling_rate())) + 1;
|
|
100 |
assert(result > 0 && result < SIZE_MAX, "Result is not in an acceptable range.");
|
|
101 |
size_t rate = static_cast<size_t>(result);
|
|
102 |
_bytes_until_sample = rate;
|
|
103 |
}
|
|
104 |
|
|
105 |
void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
|
|
106 |
if (get_sampling_rate() == 1) {
|
|
107 |
_bytes_until_sample = 1;
|
|
108 |
return;
|
|
109 |
}
|
|
110 |
|
|
111 |
pick_next_geometric_sample();
|
|
112 |
|
|
113 |
// Try to correct sample size by removing extra space from last allocation.
|
|
114 |
if (overflowed_bytes > 0 && _bytes_until_sample > overflowed_bytes) {
|
|
115 |
_bytes_until_sample -= overflowed_bytes;
|
|
116 |
}
|
|
117 |
}
|
|
118 |
|
|
119 |
void ThreadHeapSampler::check_for_sampling(HeapWord* ptr, size_t allocation_size, size_t bytes_since_allocation) {
|
|
120 |
oopDesc* oop = reinterpret_cast<oopDesc*>(ptr);
|
|
121 |
size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
|
|
122 |
|
|
123 |
// If not yet time for a sample, skip it.
|
|
124 |
if (total_allocated_bytes < _bytes_until_sample) {
|
|
125 |
_bytes_until_sample -= total_allocated_bytes;
|
|
126 |
return;
|
|
127 |
}
|
|
128 |
|
|
129 |
JvmtiExport::sampled_object_alloc_event_collector(oop);
|
|
130 |
|
|
131 |
size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
|
|
132 |
pick_next_sample(overflow_bytes);
|
|
133 |
}
|
|
134 |
|
|
135 |
void ThreadHeapSampler::init_log_table() {
|
|
136 |
MutexLockerEx mu(ThreadHeapSampler_lock, Mutex::_no_safepoint_check_flag);
|
|
137 |
|
|
138 |
if (log_table_initialized) {
|
|
139 |
return;
|
|
140 |
}
|
|
141 |
|
|
142 |
for (int i = 0; i < (1 << FastLogNumBits); i++) {
|
|
143 |
log_table[i] = (log(1.0 + static_cast<double>(i+0.5) / (1 << FastLogNumBits))
|
|
144 |
/ log(2.0));
|
|
145 |
}
|
|
146 |
|
|
147 |
log_table_initialized = true;
|
|
148 |
}
|
|
149 |
|
|
150 |
void ThreadHeapSampler::enable() {
|
|
151 |
// Done here to be done when things have settled. This adds a mutex lock but
|
|
152 |
// presumably, users won't be enabling and disabling all the time.
|
|
153 |
init_log_table();
|
|
154 |
OrderAccess::release_store(&_enabled, 1);
|
|
155 |
}
|
|
156 |
|
|
157 |
int ThreadHeapSampler::enabled() {
|
|
158 |
return OrderAccess::load_acquire(&_enabled);
|
|
159 |
}
|
|
160 |
|
|
161 |
void ThreadHeapSampler::disable() {
|
|
162 |
OrderAccess::release_store(&_enabled, 0);
|
|
163 |
}
|
|
164 |
|
|
165 |
int ThreadHeapSampler::get_sampling_rate() {
|
|
166 |
return OrderAccess::load_acquire(&_sampling_rate);
|
|
167 |
}
|
|
168 |
|
|
169 |
void ThreadHeapSampler::set_sampling_rate(int sampling_rate) {
|
|
170 |
OrderAccess::release_store(&_sampling_rate, sampling_rate);
|
|
171 |
}
|
|
172 |
|
|
173 |
// Methods used in assertion mode to check if a collector is present or not at
|
|
174 |
// the moment of TLAB sampling, ie a slow allocation path.
|
|
175 |
bool ThreadHeapSampler::sampling_collector_present() const {
|
|
176 |
return _collectors_present > 0;
|
|
177 |
}
|
|
178 |
|
|
179 |
bool ThreadHeapSampler::remove_sampling_collector() {
|
|
180 |
assert(_collectors_present > 0, "Problem with collector counter.");
|
|
181 |
_collectors_present--;
|
|
182 |
return true;
|
|
183 |
}
|
|
184 |
|
|
185 |
bool ThreadHeapSampler::add_sampling_collector() {
|
|
186 |
_collectors_present++;
|
|
187 |
return true;
|
|
188 |
}
|