8063137: Never-taken branches should be pruned when GWT LambdaForms are shared
Reviewed-by: jrose, kvn
--- a/hotspot/src/share/vm/ci/ciMethod.cpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/ci/ciMethod.cpp Thu Jan 29 10:25:59 2015 -0800
@@ -70,7 +70,8 @@
// Loaded method.
ciMethod::ciMethod(methodHandle h_m, ciInstanceKlass* holder) :
ciMetadata(h_m()),
- _holder(holder)
+ _holder(holder),
+ _has_injected_profile(false)
{
assert(h_m() != NULL, "no null method");
@@ -168,7 +169,8 @@
_liveness( NULL),
_can_be_statically_bound(false),
_method_blocks( NULL),
- _method_data( NULL)
+ _method_data( NULL),
+ _has_injected_profile( false)
#if defined(COMPILER2) || defined(SHARK)
,
_flow( NULL),
--- a/hotspot/src/share/vm/ci/ciMethod.hpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/ci/ciMethod.hpp Thu Jan 29 10:25:59 2015 -0800
@@ -79,6 +79,7 @@
bool _is_c1_compilable;
bool _is_c2_compilable;
bool _can_be_statically_bound;
+ bool _has_injected_profile;
// Lazy fields, filled in on demand
address _code;
@@ -286,6 +287,9 @@
int instructions_size();
int scale_count(int count, float prof_factor = 1.); // make MDO count commensurate with IIC
+ bool has_injected_profile() const { return _has_injected_profile; }
+ void set_injected_profile(bool x) { _has_injected_profile = x; }
+
// Stack walking support
bool is_ignored_by_security_stack_walk() const;
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Jan 29 10:25:59 2015 -0800
@@ -243,7 +243,6 @@
template(returnType_name, "returnType") \
template(signature_name, "signature") \
template(slot_name, "slot") \
- template(selectAlternative_name, "selectAlternative") \
\
/* Support for annotations (JDK 1.5 and above) */ \
\
@@ -295,8 +294,7 @@
template(setTarget_signature, "(Ljava/lang/invoke/MethodHandle;)V") \
NOT_LP64( do_alias(intptr_signature, int_signature) ) \
LP64_ONLY( do_alias(intptr_signature, long_signature) ) \
- template(selectAlternative_signature, "(ZLjava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodHandle;)Ljava/lang/invoke/MethodHandle;") \
- \
+ \
/* common method and field names */ \
template(object_initializer_name, "<init>") \
template(class_initializer_name, "<clinit>") \
@@ -868,6 +866,12 @@
do_name( fullFence_name, "fullFence") \
do_alias( fullFence_signature, void_method_signature) \
\
+ /* Custom branch frequencies profiling support for JSR292 */ \
+ do_class(java_lang_invoke_MethodHandleImpl, "java/lang/invoke/MethodHandleImpl") \
+ do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature, F_S) \
+ do_name( profileBoolean_name, "profileBoolean") \
+ do_signature(profileBoolean_signature, "(Z[I)Z") \
+ \
/* unsafe memory references (there are a lot of them...) */ \
do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \
do_signature(putObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)V") \
--- a/hotspot/src/share/vm/opto/classes.hpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/classes.hpp Thu Jan 29 10:25:59 2015 -0800
@@ -200,6 +200,7 @@
macro(Opaque1)
macro(Opaque2)
macro(Opaque3)
+macro(ProfileBoolean)
macro(OrI)
macro(OrL)
macro(OverflowAddI)
--- a/hotspot/src/share/vm/opto/compile.cpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/compile.cpp Thu Jan 29 10:25:59 2015 -0800
@@ -3105,6 +3105,7 @@
default:
assert( !n->is_Call(), "" );
assert( !n->is_Mem(), "" );
+ assert( nop != Op_ProfileBoolean, "should be eliminated during IGVN");
break;
}
@@ -3321,6 +3322,9 @@
bool Compile::too_many_traps(ciMethod* method,
int bci,
Deoptimization::DeoptReason reason) {
+ if (method->has_injected_profile()) {
+ return false;
+ }
ciMethodData* md = method->method_data();
if (md->is_empty()) {
// Assume the trap has not occurred, or that it occurred only
@@ -3370,6 +3374,9 @@
bool Compile::too_many_recompiles(ciMethod* method,
int bci,
Deoptimization::DeoptReason reason) {
+ if (method->has_injected_profile()) {
+ return false;
+ }
ciMethodData* md = method->method_data();
if (md->is_empty()) {
// Assume the trap has not occurred, or that it occurred only
--- a/hotspot/src/share/vm/opto/graphKit.hpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/graphKit.hpp Thu Jan 29 10:25:59 2015 -0800
@@ -714,6 +714,15 @@
klass, reason_string, must_throw, keep_exact_action);
}
+ // Bail out to the interpreter and keep exact action (avoid switching to Action_none).
+ void uncommon_trap_exact(Deoptimization::DeoptReason reason,
+ Deoptimization::DeoptAction action,
+ ciKlass* klass = NULL, const char* reason_string = NULL,
+ bool must_throw = false) {
+ uncommon_trap(Deoptimization::make_trap_request(reason, action),
+ klass, reason_string, must_throw, /*keep_exact_action=*/true);
+ }
+
// SP when bytecode needs to be reexecuted.
virtual int reexecute_sp() { return sp(); }
--- a/hotspot/src/share/vm/opto/library_call.cpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/library_call.cpp Thu Jan 29 10:25:59 2015 -0800
@@ -41,6 +41,7 @@
#include "opto/movenode.hpp"
#include "opto/mulnode.hpp"
#include "opto/narrowptrnode.hpp"
+#include "opto/opaquenode.hpp"
#include "opto/parse.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
@@ -287,6 +288,8 @@
bool inline_updateBytesCRC32();
bool inline_updateByteBufferCRC32();
bool inline_multiplyToLen();
+
+ bool inline_profileBoolean();
};
@@ -900,6 +903,9 @@
case vmIntrinsics::_updateByteBufferCRC32:
return inline_updateByteBufferCRC32();
+ case vmIntrinsics::_profileBoolean:
+ return inline_profileBoolean();
+
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@@ -5867,3 +5873,47 @@
return instof_false; // even if it is NULL
}
+
+bool LibraryCallKit::inline_profileBoolean() {
+ Node* counts = argument(1);
+ const TypeAryPtr* ary = NULL;
+ ciArray* aobj = NULL;
+ if (counts->is_Con()
+ && (ary = counts->bottom_type()->isa_aryptr()) != NULL
+ && (aobj = ary->const_oop()->as_array()) != NULL
+ && (aobj->length() == 2)) {
+ // Profile is int[2] where [0] and [1] correspond to false and true value occurrences respectively.
+ jint false_cnt = aobj->element_value(0).as_int();
+ jint true_cnt = aobj->element_value(1).as_int();
+
+ method()->set_injected_profile(true);
+
+ if (C->log() != NULL) {
+ C->log()->elem("observe source='profileBoolean' false='%d' true='%d'",
+ false_cnt, true_cnt);
+ }
+
+ if (false_cnt + true_cnt == 0) {
+ // According to profile, never executed.
+ uncommon_trap_exact(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_reinterpret);
+ return true;
+ }
+ // Stop profiling.
+ // MethodHandleImpl::profileBoolean() has profiling logic in it's bytecode.
+ // By replacing method's body with profile data (represented as ProfileBooleanNode
+ // on IR level) we effectively disable profiling.
+ // It enables full speed execution once optimized code is generated.
+ Node* profile = _gvn.transform(new ProfileBooleanNode(argument(0), false_cnt, true_cnt));
+ C->record_for_igvn(profile);
+ set_result(profile);
+ return true;
+ } else {
+ // Continue profiling.
+ // Profile data isn't available at the moment. So, execute method's bytecode version.
+ // Usually, when GWT LambdaForms are profiled it means that a stand-alone nmethod
+ // is compiled and counters aren't available since corresponding MethodHandle
+ // isn't a compile-time constant.
+ return false;
+ }
+}
--- a/hotspot/src/share/vm/opto/opaquenode.cpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/opaquenode.cpp Thu Jan 29 10:25:59 2015 -0800
@@ -60,4 +60,27 @@
return (&n == this); // Always fail except on self
}
+//=============================================================================
+uint ProfileBooleanNode::hash() const { return NO_HASH; }
+uint ProfileBooleanNode::cmp( const Node &n ) const {
+ return (&n == this);
+}
+
+Node *ProfileBooleanNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (can_reshape && _delay_removal) {
+ _delay_removal = false;
+ return this;
+ } else {
+ return NULL;
+ }
+}
+
+Node *ProfileBooleanNode::Identity( PhaseTransform *phase ) {
+ if (_delay_removal) {
+ return this;
+ } else {
+ assert(_consumed, "profile should be consumed before elimination");
+ return in(1);
+ }
+}
--- a/hotspot/src/share/vm/opto/opaquenode.hpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/opaquenode.hpp Thu Jan 29 10:25:59 2015 -0800
@@ -87,5 +87,31 @@
bool rtm_opt() const { return (_opt == RTM_OPT); }
};
+//------------------------------ProfileBooleanNode-------------------------------
+// A node represents value profile for a boolean during parsing.
+// Once parsing is over, the node goes away (during IGVN).
+// It is used to override branch frequencies from MDO (see has_injected_profile in parse2.cpp).
+class ProfileBooleanNode : public Node {
+ uint _false_cnt;
+ uint _true_cnt;
+ bool _consumed;
+ bool _delay_removal;
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const;
+ public:
+ ProfileBooleanNode(Node *n, uint false_cnt, uint true_cnt) : Node(0, n),
+ _false_cnt(false_cnt), _true_cnt(true_cnt), _delay_removal(true), _consumed(false) {}
+
+ uint false_count() const { return _false_cnt; }
+ uint true_count() const { return _true_cnt; }
+
+ void consume() { _consumed = true; }
+
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity(PhaseTransform *phase);
+ virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+};
+
#endif // SHARE_VM_OPTO_OPAQUENODE_HPP
--- a/hotspot/src/share/vm/opto/parse.hpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/parse.hpp Thu Jan 29 10:25:59 2015 -0800
@@ -555,8 +555,8 @@
void do_jsr();
void do_ret();
- float dynamic_branch_prediction(float &cnt);
- float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
+ float dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test);
+ float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci, Node* test);
bool seems_never_taken(float prob) const;
bool path_is_suitable_for_uncommon_trap(float prob) const;
bool seems_stable_comparison() const;
--- a/hotspot/src/share/vm/opto/parse2.cpp Wed Jan 28 07:55:27 2015 +0100
+++ b/hotspot/src/share/vm/opto/parse2.cpp Thu Jan 29 10:25:59 2015 -0800
@@ -37,6 +37,7 @@
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
+#include "opto/opaquenode.hpp"
#include "opto/parse.hpp"
#include "opto/runtime.hpp"
#include "runtime/deoptimization.hpp"
@@ -763,35 +764,64 @@
merge_common(target, pnum);
}
+static bool has_injected_profile(BoolTest::mask btest, Node* test, int& taken, int& not_taken) {
+ if (btest != BoolTest::eq && btest != BoolTest::ne) {
+ // Only ::eq and ::ne are supported for profile injection.
+ return false;
+ }
+ if (test->is_Cmp() &&
+ test->in(1)->Opcode() == Op_ProfileBoolean) {
+ ProfileBooleanNode* profile = (ProfileBooleanNode*)test->in(1);
+ int false_cnt = profile->false_count();
+ int true_cnt = profile->true_count();
+
+ // Counts matching depends on the actual test operation (::eq or ::ne).
+ // No need to scale the counts because profile injection was designed
+ // to feed exact counts into VM.
+ taken = (btest == BoolTest::eq) ? false_cnt : true_cnt;
+ not_taken = (btest == BoolTest::eq) ? true_cnt : false_cnt;
+
+ profile->consume();
+ return true;
+ }
+ return false;
+}
//--------------------------dynamic_branch_prediction--------------------------
// Try to gather dynamic branch prediction behavior. Return a probability
// of the branch being taken and set the "cnt" field. Returns a -1.0
// if we need to use static prediction for some reason.
-float Parse::dynamic_branch_prediction(float &cnt) {
+float Parse::dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test) {
ResourceMark rm;
cnt = COUNT_UNKNOWN;
- // Use MethodData information if it is available
- // FIXME: free the ProfileData structure
- ciMethodData* methodData = method()->method_data();
- if (!methodData->is_mature()) return PROB_UNKNOWN;
- ciProfileData* data = methodData->bci_to_data(bci());
- if (!data->is_JumpData()) return PROB_UNKNOWN;
+ int taken = 0;
+ int not_taken = 0;
+
+ bool use_mdo = !has_injected_profile(btest, test, taken, not_taken);
- // get taken and not taken values
- int taken = data->as_JumpData()->taken();
- int not_taken = 0;
- if (data->is_BranchData()) {
- not_taken = data->as_BranchData()->not_taken();
+ if (use_mdo) {
+ // Use MethodData information if it is available
+ // FIXME: free the ProfileData structure
+ ciMethodData* methodData = method()->method_data();
+ if (!methodData->is_mature()) return PROB_UNKNOWN;
+ ciProfileData* data = methodData->bci_to_data(bci());
+ if (!data->is_JumpData()) return PROB_UNKNOWN;
+
+ // get taken and not taken values
+ taken = data->as_JumpData()->taken();
+ not_taken = 0;
+ if (data->is_BranchData()) {
+ not_taken = data->as_BranchData()->not_taken();
+ }
+
+ // scale the counts to be commensurate with invocation counts:
+ taken = method()->scale_count(taken);
+ not_taken = method()->scale_count(not_taken);
}
- // scale the counts to be commensurate with invocation counts:
- taken = method()->scale_count(taken);
- not_taken = method()->scale_count(not_taken);
-
// Give up if too few (or too many, in which case the sum will overflow) counts to be meaningful.
- // We also check that individual counters are positive first, overwise the sum can become positive.
+ // We also check that individual counters are positive first, otherwise the sum can become positive.
if (taken < 0 || not_taken < 0 || taken + not_taken < 40) {
if (C->log() != NULL) {
C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d'", iter().get_dest(), taken, not_taken);
@@ -841,8 +871,9 @@
//-----------------------------branch_prediction-------------------------------
float Parse::branch_prediction(float& cnt,
BoolTest::mask btest,
- int target_bci) {
- float prob = dynamic_branch_prediction(cnt);
+ int target_bci,
+ Node* test) {
+ float prob = dynamic_branch_prediction(cnt, btest, test);
// If prob is unknown, switch to static prediction
if (prob != PROB_UNKNOWN) return prob;
@@ -932,7 +963,7 @@
Block* next_block = successor_for_bci(iter().next_bci());
float cnt;
- float prob = branch_prediction(cnt, btest, target_bci);
+ float prob = branch_prediction(cnt, btest, target_bci, c);
if (prob == PROB_UNKNOWN) {
// (An earlier version of do_ifnull omitted this trap for OSR methods.)
#ifndef PRODUCT
@@ -1013,7 +1044,7 @@
Block* next_block = successor_for_bci(iter().next_bci());
float cnt;
- float prob = branch_prediction(cnt, btest, target_bci);
+ float prob = branch_prediction(cnt, btest, target_bci, c);
float untaken_prob = 1.0 - prob;
if (prob == PROB_UNKNOWN) {