--- a/hotspot/src/share/vm/oops/methodKlass.cpp Fri Mar 04 20:01:48 2011 -0800
+++ b/hotspot/src/share/vm/oops/methodKlass.cpp Fri Mar 04 22:44:50 2011 -0800
@@ -103,6 +103,12 @@
m->backedge_counter()->init();
m->clear_number_of_breakpoints();
+#ifdef TIERED
+ m->set_rate(0);
+ m->set_prev_event_count(0);
+ m->set_prev_time(0);
+#endif
+
assert(m->is_parsable(), "must be parsable here.");
assert(m->size() == size, "wrong size for object");
// We should not publish an uprasable object's reference
--- a/hotspot/src/share/vm/oops/methodOop.hpp Fri Mar 04 20:01:48 2011 -0800
+++ b/hotspot/src/share/vm/oops/methodOop.hpp Fri Mar 04 22:44:50 2011 -0800
@@ -84,6 +84,11 @@
// | invocation_counter |
// | backedge_counter |
// |------------------------------------------------------|
+// | prev_time (tiered only, 64 bit wide) |
+// | |
+// |------------------------------------------------------|
+// | rate (tiered) |
+// |------------------------------------------------------|
// | code (pointer) |
// | i2i (pointer) |
// | adapter (pointer) |
@@ -124,6 +129,11 @@
InvocationCounter _invocation_counter; // Incremented before each activation of the method - used to trigger frequency-based optimizations
InvocationCounter _backedge_counter; // Incremented before each backedge taken - used to trigger frequencey-based optimizations
+#ifdef TIERED
+ jlong _prev_time; // Previous time the rate was acquired
+ float _rate; // Events (invocation and backedge counter increments) per millisecond
+#endif
+
#ifndef PRODUCT
int _compiled_invocation_count; // Number of nmethod invocations so far (for perf. debugging)
#endif
@@ -304,6 +314,17 @@
InvocationCounter* invocation_counter() { return &_invocation_counter; }
InvocationCounter* backedge_counter() { return &_backedge_counter; }
+#ifdef TIERED
+ // We are reusing interpreter_invocation_count as a holder for the previous event count!
+ // We can do that since interpreter_invocation_count is not used in tiered.
+ int prev_event_count() const { return _interpreter_invocation_count; }
+ void set_prev_event_count(int count) { _interpreter_invocation_count = count; }
+ jlong prev_time() const { return _prev_time; }
+ void set_prev_time(jlong time) { _prev_time = time; }
+ float rate() const { return _rate; }
+ void set_rate(float rate) { _rate = rate; }
+#endif
+
int invocation_count();
int backedge_count();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/runtime/advancedThresholdPolicy.cpp Fri Mar 04 22:44:50 2011 -0800
@@ -0,0 +1,450 @@
+/*
+* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
+* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
+*/
+
+#include "precompiled.hpp"
+#include "runtime/advancedThresholdPolicy.hpp"
+#include "runtime/simpleThresholdPolicy.inline.hpp"
+
+#ifdef TIERED
+// Print an event.
+void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, methodHandle imh,
+ int bci, CompLevel level) {
+ tty->print(" rate: ");
+ if (mh->prev_time() == 0) tty->print("n/a");
+ else tty->print("%f", mh->rate());
+
+ tty->print(" k: %.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback),
+ threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback));
+
+}
+
+void AdvancedThresholdPolicy::initialize() {
+ // Turn on ergonomic compiler count selection
+ if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
+ FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
+ }
+ int count = CICompilerCount;
+ if (CICompilerCountPerCPU) {
+ // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
+ int log_cpu = log2_intptr(os::active_processor_count());
+ int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
+ count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
+ }
+
+ set_c1_count(MAX2(count / 3, 1));
+ set_c2_count(MAX2(count - count / 3, 1));
+
+ // Some inlining tuning
+#ifdef X86
+ if (FLAG_IS_DEFAULT(InlineSmallCode)) {
+ FLAG_SET_DEFAULT(InlineSmallCode, 2000);
+ }
+#endif
+
+#ifdef SPARC
+ if (FLAG_IS_DEFAULT(InlineSmallCode)) {
+ FLAG_SET_DEFAULT(InlineSmallCode, 2500);
+ }
+#endif
+
+
+ set_start_time(os::javaTimeMillis());
+}
+
+// update_rate() is called from select_task() while holding a compile queue lock.
+void AdvancedThresholdPolicy::update_rate(jlong t, methodOop m) {
+ if (is_old(m)) {
+ // We don't remove old methods from the queue,
+ // so we can just zero the rate.
+ m->set_rate(0);
+ return;
+ }
+
+ // We don't update the rate if we've just came out of a safepoint.
+ // delta_s is the time since last safepoint in milliseconds.
+ jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
+ jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement
+ // How many events were there since the last time?
+ int event_count = m->invocation_count() + m->backedge_count();
+ int delta_e = event_count - m->prev_event_count();
+
+ // We should be running for at least 1ms.
+ if (delta_s >= TieredRateUpdateMinTime) {
+ // And we must've taken the previous point at least 1ms before.
+ if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) {
+ m->set_prev_time(t);
+ m->set_prev_event_count(event_count);
+ m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond
+ } else
+ if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) {
+ // If nothing happened for 25ms, zero the rate. Don't modify prev values.
+ m->set_rate(0);
+ }
+ }
+}
+
+// Check if this method has been stale from a given number of milliseconds.
+// See select_task().
+bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, methodOop m) {
+ jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
+ jlong delta_t = t - m->prev_time();
+ if (delta_t > timeout && delta_s > timeout) {
+ int event_count = m->invocation_count() + m->backedge_count();
+ int delta_e = event_count - m->prev_event_count();
+ // Return true if there were no events.
+ return delta_e == 0;
+ }
+ return false;
+}
+
+// We don't remove old methods from the compile queue even if they have
+// very low activity. See select_task().
+bool AdvancedThresholdPolicy::is_old(methodOop method) {
+ return method->invocation_count() > 50000 || method->backedge_count() > 500000;
+}
+
+double AdvancedThresholdPolicy::weight(methodOop method) {
+ return (method->rate() + 1) * ((method->invocation_count() + 1) * (method->backedge_count() + 1));
+}
+
+// Apply heuristics and return true if x should be compiled before y
+bool AdvancedThresholdPolicy::compare_methods(methodOop x, methodOop y) {
+ if (x->highest_comp_level() > y->highest_comp_level()) {
+ // recompilation after deopt
+ return true;
+ } else
+ if (x->highest_comp_level() == y->highest_comp_level()) {
+ if (weight(x) > weight(y)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Is method profiled enough?
+bool AdvancedThresholdPolicy::is_method_profiled(methodOop method) {
+ methodDataOop mdo = method->method_data();
+ if (mdo != NULL) {
+ int i = mdo->invocation_count_delta();
+ int b = mdo->backedge_count_delta();
+ return call_predicate_helper<CompLevel_full_profile>(i, b, 1);
+ }
+ return false;
+}
+
+// Called with the queue locked and with at least one element
+CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
+ CompileTask *max_task = NULL;
+ methodOop max_method;
+ jlong t = os::javaTimeMillis();
+ // Iterate through the queue and find a method with a maximum rate.
+ for (CompileTask* task = compile_queue->first(); task != NULL;) {
+ CompileTask* next_task = task->next();
+ methodOop method = (methodOop)JNIHandles::resolve(task->method_handle());
+ methodDataOop mdo = method->method_data();
+ update_rate(t, method);
+ if (max_task == NULL) {
+ max_task = task;
+ max_method = method;
+ } else {
+ // If a method has been stale for some time, remove it from the queue.
+ if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+ if (PrintTieredEvents) {
+ print_event(KILL, method, method, task->osr_bci(), (CompLevel)task->comp_level());
+ }
+ CompileTaskWrapper ctw(task); // Frees the task
+ compile_queue->remove(task);
+ method->clear_queued_for_compilation();
+ task = next_task;
+ continue;
+ }
+
+ // Select a method with a higher rate
+ if (compare_methods(method, max_method)) {
+ max_task = task;
+ max_method = method;
+ }
+ }
+ task = next_task;
+ }
+
+ if (max_task->comp_level() == CompLevel_full_profile && is_method_profiled(max_method)) {
+ max_task->set_comp_level(CompLevel_limited_profile);
+ if (PrintTieredEvents) {
+ print_event(UPDATE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
+ }
+ }
+
+ return max_task;
+}
+
+double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) {
+ double queue_size = CompileBroker::queue_size(level);
+ int comp_count = compiler_count(level);
+ double k = queue_size / (feedback_k * comp_count) + 1;
+ return k;
+}
+
+// Call and loop predicates determine whether a transition to a higher
+// compilation level should be performed (pointers to predicate functions
+// are passed to common()).
+// Tier?LoadFeedback is basically a coefficient that determines of
+// how many methods per compiler thread can be in the queue before
+// the threshold values double.
+bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level) {
+ switch(cur_level) {
+ case CompLevel_none:
+ case CompLevel_limited_profile: {
+ double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+ return loop_predicate_helper<CompLevel_none>(i, b, k);
+ }
+ case CompLevel_full_profile: {
+ double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
+ return loop_predicate_helper<CompLevel_full_profile>(i, b, k);
+ }
+ default:
+ return true;
+ }
+}
+
+bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level) {
+ switch(cur_level) {
+ case CompLevel_none:
+ case CompLevel_limited_profile: {
+ double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+ return call_predicate_helper<CompLevel_none>(i, b, k);
+ }
+ case CompLevel_full_profile: {
+ double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
+ return call_predicate_helper<CompLevel_full_profile>(i, b, k);
+ }
+ default:
+ return true;
+ }
+}
+
+// If a method is old enough and is still in the interpreter we would want to
+// start profiling without waiting for the compiled method to arrive.
+// We also take the load on compilers into the account.
+bool AdvancedThresholdPolicy::should_create_mdo(methodOop method, CompLevel cur_level) {
+ if (cur_level == CompLevel_none &&
+ CompileBroker::queue_size(CompLevel_full_optimization) <=
+ Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
+ int i = method->invocation_count();
+ int b = method->backedge_count();
+ double k = Tier0ProfilingStartPercentage / 100.0;
+ return call_predicate_helper<CompLevel_none>(i, b, k) || loop_predicate_helper<CompLevel_none>(i, b, k);
+ }
+ return false;
+}
+
+// Create MDO if necessary.
+void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) {
+ if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return;
+ if (mh->method_data() == NULL) {
+ methodOopDesc::build_interpreter_method_data(mh, THREAD);
+ if (HAS_PENDING_EXCEPTION) {
+ CLEAR_PENDING_EXCEPTION;
+ }
+ }
+}
+
+
+/*
+ * Method states:
+ * 0 - interpreter (CompLevel_none)
+ * 1 - pure C1 (CompLevel_simple)
+ * 2 - C1 with invocation and backedge counting (CompLevel_limited_profile)
+ * 3 - C1 with full profiling (CompLevel_full_profile)
+ * 4 - C2 (CompLevel_full_optimization)
+ *
+ * Common state transition patterns:
+ * a. 0 -> 3 -> 4.
+ * The most common path. But note that even in this straightforward case
+ * profiling can start at level 0 and finish at level 3.
+ *
+ * b. 0 -> 2 -> 3 -> 4.
+ * This case occures when the load on C2 is deemed too high. So, instead of transitioning
+ * into state 3 directly and over-profiling while a method is in the C2 queue we transition to
+ * level 2 and wait until the load on C2 decreases. This path is disabled for OSRs.
+ *
+ * c. 0 -> (3->2) -> 4.
+ * In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough
+ * to enable the profiling to fully occur at level 0. In this case we change the compilation level
+ * of the method to 2, because it'll allow it to run much faster without full profiling while c2
+ * is compiling.
+ *
+ * d. 0 -> 3 -> 1 or 0 -> 2 -> 1.
+ * After a method was once compiled with C1 it can be identified as trivial and be compiled to
+ * level 1. These transition can also occur if a method can't be compiled with C2 but can with C1.
+ *
+ * e. 0 -> 4.
+ * This can happen if a method fails C1 compilation (it will still be profiled in the interpreter)
+ * or because of a deopt that didn't require reprofiling (compilation won't happen in this case because
+ * the compiled version already exists).
+ *
+ * Note that since state 0 can be reached from any other state via deoptimization different loops
+ * are possible.
+ *
+ */
+
+// Common transition function. Given a predicate determines if a method should transition to another level.
+CompLevel AdvancedThresholdPolicy::common(Predicate p, methodOop method, CompLevel cur_level) {
+ if (is_trivial(method)) return CompLevel_simple;
+
+ CompLevel next_level = cur_level;
+ int i = method->invocation_count();
+ int b = method->backedge_count();
+
+ switch(cur_level) {
+ case CompLevel_none:
+ // If we were at full profile level, would we switch to full opt?
+ if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) {
+ next_level = CompLevel_full_optimization;
+ } else if ((this->*p)(i, b, cur_level)) {
+ // C1-generated fully profiled code is about 30% slower than the limited profile
+ // code that has only invocation and backedge counters. The observation is that
+ // if C2 queue is large enough we can spend too much time in the fully profiled code
+ // while waiting for C2 to pick the method from the queue. To alleviate this problem
+ // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long
+ // we choose to compile a limited profiled version and then recompile with full profiling
+ // when the load on C2 goes down.
+ if (CompileBroker::queue_size(CompLevel_full_optimization) >
+ Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
+ next_level = CompLevel_limited_profile;
+ } else {
+ next_level = CompLevel_full_profile;
+ }
+ }
+ break;
+ case CompLevel_limited_profile:
+ if (is_method_profiled(method)) {
+ // Special case: we got here because this method was fully profiled in the interpreter.
+ next_level = CompLevel_full_optimization;
+ } else {
+ methodDataOop mdo = method->method_data();
+ if (mdo != NULL) {
+ if (mdo->would_profile()) {
+ if (CompileBroker::queue_size(CompLevel_full_optimization) <=
+ Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
+ (this->*p)(i, b, cur_level)) {
+ next_level = CompLevel_full_profile;
+ }
+ } else {
+ next_level = CompLevel_full_optimization;
+ }
+ }
+ }
+ break;
+ case CompLevel_full_profile:
+ {
+ methodDataOop mdo = method->method_data();
+ if (mdo != NULL) {
+ if (mdo->would_profile()) {
+ int mdo_i = mdo->invocation_count_delta();
+ int mdo_b = mdo->backedge_count_delta();
+ if ((this->*p)(mdo_i, mdo_b, cur_level)) {
+ next_level = CompLevel_full_optimization;
+ }
+ } else {
+ next_level = CompLevel_full_optimization;
+ }
+ }
+ }
+ break;
+ }
+ return next_level;
+}
+
+// Determine if a method should be compiled with a normal entry point at a different level.
+CompLevel AdvancedThresholdPolicy::call_event(methodOop method, CompLevel cur_level) {
+ CompLevel osr_level = (CompLevel) method->highest_osr_comp_level();
+ CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level);
+
+ // If OSR method level is greater than the regular method level, the levels should be
+ // equalized by raising the regular method level in order to avoid OSRs during each
+ // invocation of the method.
+ if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) {
+ methodDataOop mdo = method->method_data();
+ guarantee(mdo != NULL, "MDO should not be NULL");
+ if (mdo->invocation_count() >= 1) {
+ next_level = CompLevel_full_optimization;
+ }
+ } else {
+ next_level = MAX2(osr_level, next_level);
+ }
+
+ return next_level;
+}
+
+// Determine if we should do an OSR compilation of a given method.
+CompLevel AdvancedThresholdPolicy::loop_event(methodOop method, CompLevel cur_level) {
+ if (cur_level == CompLevel_none) {
+ // If there is a live OSR method that means that we deopted to the interpreter
+ // for the transition.
+ CompLevel osr_level = (CompLevel)method->highest_osr_comp_level();
+ if (osr_level > CompLevel_none) {
+ return osr_level;
+ }
+ }
+ return common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level);
+}
+
+// Update the rate and submit compile
+void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+ int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
+ update_rate(os::javaTimeMillis(), mh());
+ CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
+}
+
+
+// Handle the invocation event.
+void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
+ CompLevel level, TRAPS) {
+ if (should_create_mdo(mh(), level)) {
+ create_mdo(mh, THREAD);
+ }
+ if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
+ CompLevel next_level = call_event(mh(), level);
+ if (next_level != level) {
+ compile(mh, InvocationEntryBci, next_level, THREAD);
+ }
+ }
+}
+
+// Handle the back branch event. Notice that we can compile the method
+// with a regular entry from here.
+void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
+ int bci, CompLevel level, TRAPS) {
+ if (should_create_mdo(mh(), level)) {
+ create_mdo(mh, THREAD);
+ }
+
+ // If the method is already compiling, quickly bail out.
+ if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) {
+ // Use loop event as an opportinity to also check there's been
+ // enough calls.
+ CompLevel cur_level = comp_level(mh());
+ CompLevel next_level = call_event(mh(), cur_level);
+ CompLevel next_osr_level = loop_event(mh(), level);
+ if (next_osr_level == CompLevel_limited_profile) {
+ next_osr_level = CompLevel_full_profile; // OSRs are supposed to be for very hot methods.
+ }
+ next_level = MAX2(next_level,
+ next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level);
+ bool is_compiling = false;
+ if (next_level != cur_level) {
+ compile(mh, InvocationEntryBci, next_level, THREAD);
+ is_compiling = true;
+ }
+
+ // Do the OSR version
+ if (!is_compiling && next_osr_level != level) {
+ compile(mh, bci, next_osr_level, THREAD);
+ }
+ }
+}
+
+#endif // TIERED
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/runtime/advancedThresholdPolicy.hpp Fri Mar 04 22:44:50 2011 -0800
@@ -0,0 +1,207 @@
+/*
+* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
+* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
+*/
+
+#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
+#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
+
+#include "runtime/simpleThresholdPolicy.hpp"
+
+#ifdef TIERED
+class CompileTask;
+class CompileQueue;
+
+/*
+ * The system supports 5 execution levels:
+ * * level 0 - interpreter
+ * * level 1 - C1 with full optimization (no profiling)
+ * * level 2 - C1 with invocation and backedge counters
+ * * level 3 - C1 with full profiling (level 2 + MDO)
+ * * level 4 - C2
+ *
+ * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters
+ * (invocation counters and backedge counters). The frequency of these notifications is
+ * different at each level. These notifications are used by the policy to decide what transition
+ * to make.
+ *
+ * Execution starts at level 0 (interpreter), then the policy can decide either to compile the
+ * method at level 3 or level 2. The decision is based on the following factors:
+ * 1. The length of the C2 queue determines the next level. The observation is that level 2
+ * is generally faster than level 3 by about 30%, therefore we would want to minimize the time
+ * a method spends at level 3. We should only spend the time at level 3 that is necessary to get
+ * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to
+ * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile
+ * request makes its way through the long queue. When the load on C2 recedes we are going to
+ * recompile at level 3 and start gathering profiling information.
+ * 2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce
+ * additional filtering if the compiler is overloaded. The rationale is that by the time a
+ * method gets compiled it can become unused, so it doesn't make sense to put too much onto the
+ * queue.
+ *
+ * After profiling is completed at level 3 the transition is made to level 4. Again, the length
+ * of the C2 queue is used as a feedback to adjust the thresholds.
+ *
+ * After the first C1 compile some basic information is determined about the code like the number
+ * of the blocks and the number of the loops. Based on that it can be decided that a method
+ * is trivial and compiling it with C1 will yield the same code. In this case the method is
+ * compiled at level 1 instead of 4.
+ *
+ * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of
+ * the code and the C2 queue is sufficiently small we can decide to start profiling in the
+ * interpreter (and continue profiling in the compiled code once the level 3 version arrives).
+ * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2
+ * version is compiled instead in order to run faster waiting for a level 4 version.
+ *
+ * Compile queues are implemented as priority queues - for each method in the queue we compute
+ * the event rate (the number of invocation and backedge counter increments per unit of time).
+ * When getting an element off the queue we pick the one with the largest rate. Maintaining the
+ * rate also allows us to remove stale methods (the ones that got on the queue but stopped
+ * being used shortly after that).
+*/
+
+/* Command line options:
+ * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method
+ * invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread
+ * makes a call into the runtime.
+ *
+ * - Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control
+ * compilation thresholds.
+ * Level 2 thresholds are not used and are provided for option-compatibility and potential future use.
+ * Other thresholds work as follows:
+ *
+ * Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when
+ * the following predicate is true (X is the level):
+ *
+ * i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s && i + b > TierXCompileThreshold * s),
+ *
+ * where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling
+ * coefficient that will be discussed further.
+ * The intuition is to equalize the time that is spend profiling each method.
+ * The same predicate is used to control the transition from level 3 to level 4 (C2). It should be
+ * noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come
+ * from methodOop and for 3->4 transition they come from MDO (since profiled invocations are
+ * counted separately).
+ *
+ * OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates.
+ *
+ * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending
+ * on the compiler load. The scaling coefficients are computed as follows:
+ *
+ * s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1,
+ *
+ * where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X
+ * is the number of level X compiler threads.
+ *
+ * Basically these parameters describe how many methods should be in the compile queue
+ * per compiler thread before the scaling coefficient increases by one.
+ *
+ * This feedback provides the mechanism to automatically control the flow of compilation requests
+ * depending on the machine speed, mutator load and other external factors.
+ *
+ * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop.
+ * Consider the following observation: a method compiled with full profiling (level 3)
+ * is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO).
+ * Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue
+ * gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues
+ * executing at level 3 for much longer time than is required by the predicate and at suboptimal speed.
+ * The idea is to dynamically change the behavior of the system in such a way that if a substantial
+ * load on C2 is detected we would first do the 0->2 transition allowing a method to run faster.
+ * And then when the load decreases to allow 2->3 transitions.
+ *
+ * Tier3Delay* parameters control this switching mechanism.
+ * Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy
+ * no longer does 0->3 transitions but does 0->2 transitions instead.
+ * Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue
+ * per compiler thread falls below the specified amount.
+ * The hysteresis is necessary to avoid jitter.
+ *
+ * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue.
+ * Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to
+ * compile from the compile queue, we also can detect stale methods for which the rate has been
+ * 0 for some time in the same iteration. Stale methods can appear in the queue when an application
+ * abruptly changes its behavior.
+ *
+ * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick
+ * to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything
+ * with pure c1.
+ *
+ * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the
+ * 0->3 predicate are already exceeded by the given percentage but the level 3 version of the
+ * method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled
+ * version in time. This reduces the overall transition to level 4 and decreases the startup time.
+ * Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long
+ * these is not reason to start profiling prematurely.
+ *
+ * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation.
+ * Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered
+ * to be zero if no events occurred in TieredRateUpdateMaxTime.
+ */
+
+
+class AdvancedThresholdPolicy : public SimpleThresholdPolicy {
+ jlong _start_time;
+
+ // Call and loop predicates determine whether a transition to a higher compilation
+ // level should be performed (pointers to predicate functions are passed to common().
+ // Predicates also take compiler load into account.
+ typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level);
+ bool call_predicate(int i, int b, CompLevel cur_level);
+ bool loop_predicate(int i, int b, CompLevel cur_level);
+ // Common transition function. Given a predicate determines if a method should transition to another level.
+ CompLevel common(Predicate p, methodOop method, CompLevel cur_level);
+ // Transition functions.
+ // call_event determines if a method should be compiled at a different
+ // level with a regular invocation entry.
+ CompLevel call_event(methodOop method, CompLevel cur_level);
+ // loop_event checks if a method should be OSR compiled at a different
+ // level.
+ CompLevel loop_event(methodOop method, CompLevel cur_level);
+ // Has a method been long around?
+ // We don't remove old methods from the compile queue even if they have
+ // very low activity (see select_task()).
+ inline bool is_old(methodOop method);
+ // Was a given method inactive for a given number of milliseconds.
+ // If it is, we would remove it from the queue (see select_task()).
+ inline bool is_stale(jlong t, jlong timeout, methodOop m);
+ // Compute the weight of the method for the compilation scheduling
+ inline double weight(methodOop method);
+ // Apply heuristics and return true if x should be compiled before y
+ inline bool compare_methods(methodOop x, methodOop y);
+ // Compute event rate for a given method. The rate is the number of event (invocations + backedges)
+ // per millisecond.
+ inline void update_rate(jlong t, methodOop m);
+ // Compute threshold scaling coefficient
+ inline double threshold_scale(CompLevel level, int feedback_k);
+ // If a method is old enough and is still in the interpreter we would want to
+ // start profiling without waiting for the compiled method to arrive. This function
+ // determines whether we should do that.
+ inline bool should_create_mdo(methodOop method, CompLevel cur_level);
+ // Create MDO if necessary.
+ void create_mdo(methodHandle mh, TRAPS);
+ // Is method profiled enough?
+ bool is_method_profiled(methodOop method);
+
+protected:
+ void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level);
+
+ void set_start_time(jlong t) { _start_time = t; }
+ jlong start_time() const { return _start_time; }
+
+ // Submit a given method for compilation (and update the rate).
+ virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+ // event() from SimpleThresholdPolicy would call these.
+ virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
+ CompLevel level, TRAPS);
+ virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
+ int bci, CompLevel level, TRAPS);
+public:
+ AdvancedThresholdPolicy() : _start_time(0) { }
+ // Select task is called by CompileBroker. We should return a task or NULL.
+ virtual CompileTask* select_task(CompileQueue* compile_queue);
+ virtual void initialize();
+};
+
+#endif // TIERED
+
+#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
--- a/hotspot/src/share/vm/runtime/arguments.cpp Fri Mar 04 20:01:48 2011 -0800
+++ b/hotspot/src/share/vm/runtime/arguments.cpp Fri Mar 04 22:44:50 2011 -0800
@@ -1026,8 +1026,9 @@
}
void Arguments::set_tiered_flags() {
+ // With tiered, set default policy to AdvancedThresholdPolicy, which is 3.
if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) {
- FLAG_SET_DEFAULT(CompilationPolicyChoice, 2);
+ FLAG_SET_DEFAULT(CompilationPolicyChoice, 3);
}
if (CompilationPolicyChoice < 2) {
vm_exit_during_initialization(
--- a/hotspot/src/share/vm/runtime/compilationPolicy.cpp Fri Mar 04 20:01:48 2011 -0800
+++ b/hotspot/src/share/vm/runtime/compilationPolicy.cpp Fri Mar 04 22:44:50 2011 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,7 @@
#include "oops/methodOop.hpp"
#include "oops/oop.inline.hpp"
#include "prims/nativeLookup.hpp"
+#include "runtime/advancedThresholdPolicy.hpp"
#include "runtime/compilationPolicy.hpp"
#include "runtime/frame.hpp"
#include "runtime/handles.inline.hpp"
@@ -72,8 +73,15 @@
Unimplemented();
#endif
break;
+ case 3:
+#ifdef TIERED
+ CompilationPolicy::set_policy(new AdvancedThresholdPolicy());
+#else
+ Unimplemented();
+#endif
+ break;
default:
- fatal("CompilationPolicyChoice must be in the range: [0-2]");
+ fatal("CompilationPolicyChoice must be in the range: [0-3]");
}
CompilationPolicy::policy()->initialize();
}