8203197: C2: consider all paths in loop body for loop predication
Reviewed-by: kvn, neliasso
--- a/src/hotspot/share/oops/methodData.hpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/oops/methodData.hpp Tue Jun 19 09:08:39 2018 +0200
@@ -1976,7 +1976,7 @@
// Whole-method sticky bits and flags
enum {
- _trap_hist_limit = 23 JVMCI_ONLY(+5), // decoupled from Deoptimization::Reason_LIMIT
+ _trap_hist_limit = 24 JVMCI_ONLY(+5), // decoupled from Deoptimization::Reason_LIMIT
_trap_hist_mask = max_jubyte,
_extra_data_count = 4 // extra DataLayout headers, for trap history
}; // Public flag values
--- a/src/hotspot/share/opto/c2_globals.hpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/c2_globals.hpp Tue Jun 19 09:08:39 2018 +0200
@@ -754,6 +754,9 @@
product(uintx, LoopStripMiningIterShortLoop, 0, \
"Loop with fewer iterations are not strip mined") \
range(0, max_juint) \
+ \
+ product(bool, UseProfiledLoopPredicate, true, \
+ "move predicates out of loops based on profiling data") \
C2_FLAGS(DECLARE_DEVELOPER_FLAG, \
DECLARE_PD_DEVELOPER_FLAG, \
--- a/src/hotspot/share/opto/graphKit.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/graphKit.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -3804,6 +3804,9 @@
if (UseLoopPredicate) {
add_predicate_impl(Deoptimization::Reason_predicate, nargs);
}
+ if (UseProfiledLoopPredicate) {
+ add_predicate_impl(Deoptimization::Reason_profile_predicate, nargs);
+ }
// loop's limit check predicate should be near the loop.
add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs);
}
--- a/src/hotspot/share/opto/loopPredicate.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/loopPredicate.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -34,6 +34,8 @@
#include "opto/opaquenode.hpp"
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
+#include <fenv.h>
+#include <math.h>
/*
* The general idea of Loop Predication is to insert a predicate on the entry
@@ -318,18 +320,37 @@
if (limit_check_proj != NULL) {
entry = entry->in(0)->in(0);
}
+ ProjNode* profile_predicate_proj = NULL;
+ ProjNode* predicate_proj = NULL;
+ if (UseProfiledLoopPredicate) {
+ profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+ if (profile_predicate_proj != NULL) {
+ entry = skip_loop_predicates(entry);
+ }
+ }
if (UseLoopPredicate) {
- ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
- if (predicate_proj != NULL) { // right pattern that can be used by loop predication
- // clone predicate
- new_entry = clone_predicate(predicate_proj, new_entry,
- Deoptimization::Reason_predicate,
- loop_phase, igvn);
- assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate");
- if (TraceLoopPredicate) {
- tty->print("Loop Predicate cloned: ");
- debug_only( new_entry->in(0)->dump(); )
- }
+ predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
+ }
+ if (predicate_proj != NULL) { // right pattern that can be used by loop predication
+ // clone predicate
+ new_entry = clone_predicate(predicate_proj, new_entry,
+ Deoptimization::Reason_predicate,
+ loop_phase, igvn);
+ assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate");
+ if (TraceLoopPredicate) {
+ tty->print("Loop Predicate cloned: ");
+ debug_only( new_entry->in(0)->dump(); );
+ }
+ }
+ if (profile_predicate_proj != NULL) { // right pattern that can be used by loop predication
+ // clone predicate
+ new_entry = clone_predicate(profile_predicate_proj, new_entry,
+ Deoptimization::Reason_profile_predicate,
+ loop_phase, igvn);
+ assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate");
+ if (TraceLoopPredicate) {
+ tty->print("Loop Predicate cloned: ");
+ debug_only( new_entry->in(0)->dump(); );
}
}
if (limit_check_proj != NULL && clone_limit_check) {
@@ -351,25 +372,36 @@
//--------------------------skip_loop_predicates------------------------------
// Skip related predicates.
Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) {
+ IfNode* iff = entry->in(0)->as_If();
+ ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
+ Node* rgn = uncommon_proj->unique_ctrl_out();
+ assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+ entry = entry->in(0)->in(0);
+ while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
+ uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
+ if (uncommon_proj->unique_ctrl_out() != rgn)
+ break;
+ entry = entry->in(0)->in(0);
+ }
+ return entry;
+}
+
+Node* PhaseIdealLoop::skip_all_loop_predicates(Node* entry) {
Node* predicate = NULL;
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate != NULL) {
entry = entry->in(0)->in(0);
}
+ if (UseProfiledLoopPredicate) {
+ predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+ if (predicate != NULL) { // right pattern that can be used by loop predication
+ entry = skip_loop_predicates(entry);
+ }
+ }
if (UseLoopPredicate) {
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (predicate != NULL) { // right pattern that can be used by loop predication
- IfNode* iff = entry->in(0)->as_If();
- ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
- Node* rgn = uncommon_proj->unique_ctrl_out();
- assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
- entry = entry->in(0)->in(0);
- while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
- uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
- if (uncommon_proj->unique_ctrl_out() != rgn)
- break;
- entry = entry->in(0)->in(0);
- }
+ entry = skip_loop_predicates(entry);
}
}
return entry;
@@ -400,6 +432,12 @@
return entry;
}
}
+ if (UseProfiledLoopPredicate) {
+ predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+ if (predicate != NULL) { // right pattern that can be used by loop predication
+ return entry;
+ }
+ }
return NULL;
}
@@ -766,6 +804,413 @@
return bol;
}
+// Should loop predication look not only in the path from tail to head
+// but also in branches of the loop body?
+bool PhaseIdealLoop::loop_predication_should_follow_branches(IdealLoopTree *loop, ProjNode *predicate_proj, float& loop_trip_cnt) {
+ if (!UseProfiledLoopPredicate) {
+ return false;
+ }
+
+ if (predicate_proj == NULL) {
+ return false;
+ }
+
+ LoopNode* head = loop->_head->as_Loop();
+ bool follow_branches = true;
+ IdealLoopTree* l = loop->_child;
+ // For leaf loops and loops with a single inner loop
+ while (l != NULL && follow_branches) {
+ IdealLoopTree* child = l;
+ if (child->_child != NULL &&
+ child->_head->is_OuterStripMinedLoop()) {
+ assert(child->_child->_next == NULL, "only one inner loop for strip mined loop");
+ assert(child->_child->_head->is_CountedLoop() && child->_child->_head->as_CountedLoop()->is_strip_mined(), "inner loop should be strip mined");
+ child = child->_child;
+ }
+ if (child->_child != NULL || child->_irreducible) {
+ follow_branches = false;
+ }
+ l = l->_next;
+ }
+ if (follow_branches) {
+ loop->compute_profile_trip_cnt(this);
+ if (head->is_profile_trip_failed()) {
+ follow_branches = false;
+ } else {
+ loop_trip_cnt = head->profile_trip_cnt();
+ if (head->is_CountedLoop()) {
+ CountedLoopNode* cl = head->as_CountedLoop();
+ if (cl->phi() != NULL) {
+ const TypeInt* t = _igvn.type(cl->phi())->is_int();
+ float worst_case_trip_cnt = ((float)t->_hi - t->_lo) / ABS(cl->stride_con());
+ if (worst_case_trip_cnt < loop_trip_cnt) {
+ loop_trip_cnt = worst_case_trip_cnt;
+ }
+ }
+ }
+ }
+ }
+ return follow_branches;
+}
+
+// Compute probability of reaching some CFG node from a fixed
+// dominating CFG node
+class PathFrequency {
+private:
+ Node* _dom; // frequencies are computed relative to this node
+ Node_Stack _stack;
+ GrowableArray<float> _freqs_stack; // keep track of intermediate result at regions
+ GrowableArray<float> _freqs; // cache frequencies
+ PhaseIdealLoop* _phase;
+
+public:
+ PathFrequency(Node* dom, PhaseIdealLoop* phase)
+ : _dom(dom), _stack(0), _phase(phase) {
+ }
+
+ float to(Node* n) {
+ // post order walk on the CFG graph from n to _dom
+ fesetround(FE_TOWARDZERO); // make sure rounding doesn't push frequency above 1
+ IdealLoopTree* loop = _phase->get_loop(_dom);
+ Node* c = n;
+ for (;;) {
+ assert(_phase->get_loop(c) == loop, "have to be in the same loop");
+ if (c == _dom || _freqs.at_grow(c->_idx, -1) >= 0) {
+ float f = c == _dom ? 1 : _freqs.at(c->_idx);
+ Node* prev = c;
+ while (_stack.size() > 0 && prev == c) {
+ Node* n = _stack.node();
+ if (!n->is_Region()) {
+ if (_phase->get_loop(n) != _phase->get_loop(n->in(0))) {
+ // Found an inner loop: compute frequency of reaching this
+ // exit from the loop head by looking at the number of
+ // times each loop exit was taken
+ IdealLoopTree* inner_loop = _phase->get_loop(n->in(0));
+ LoopNode* inner_head = inner_loop->_head->as_Loop();
+ assert(_phase->get_loop(n) == loop, "only 1 inner loop");
+ if (inner_head->is_OuterStripMinedLoop()) {
+ inner_head->verify_strip_mined(1);
+ if (n->in(0) == inner_head->in(LoopNode::LoopBackControl)->in(0)) {
+ n = n->in(0)->in(0)->in(0);
+ }
+ inner_loop = inner_loop->_child;
+ inner_head = inner_loop->_head->as_Loop();
+ inner_head->verify_strip_mined(1);
+ }
+ fesetround(FE_UPWARD); // make sure rounding doesn't push frequency above 1
+ float loop_exit_cnt = 0.0f;
+ for (uint i = 0; i < inner_loop->_body.size(); i++) {
+ Node *n = inner_loop->_body[i];
+ float c = inner_loop->compute_profile_trip_cnt_helper(n);
+ loop_exit_cnt += c;
+ }
+ fesetround(FE_TOWARDZERO);
+ float cnt = -1;
+ if (n->in(0)->is_If()) {
+ IfNode* iff = n->in(0)->as_If();
+ float p = n->in(0)->as_If()->_prob;
+ if (n->Opcode() == Op_IfFalse) {
+ p = 1 - p;
+ }
+ if (p > PROB_MIN) {
+ cnt = p * iff->_fcnt;
+ } else {
+ cnt = 0;
+ }
+ } else {
+ assert(n->in(0)->is_Jump(), "unsupported node kind");
+ JumpNode* jmp = n->in(0)->as_Jump();
+ float p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
+ cnt = p * jmp->_fcnt;
+ }
+ float this_exit_f = cnt > 0 ? cnt / loop_exit_cnt : 0;
+ assert(this_exit_f <= 1 && this_exit_f >= 0, "Incorrect frequency");
+ f = f * this_exit_f;
+ assert(f <= 1 && f >= 0, "Incorrect frequency");
+ } else {
+ float p = -1;
+ if (n->in(0)->is_If()) {
+ p = n->in(0)->as_If()->_prob;
+ if (n->Opcode() == Op_IfFalse) {
+ p = 1 - p;
+ }
+ } else {
+ assert(n->in(0)->is_Jump(), "unsupported node kind");
+ p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
+ }
+ f = f * p;
+ assert(f <= 1 && f >= 0, "Incorrect frequency");
+ }
+ _freqs.at_put_grow(n->_idx, (float)f, -1);
+ _stack.pop();
+ } else {
+ float prev_f = _freqs_stack.pop();
+ float new_f = f;
+ f = new_f + prev_f;
+ assert(f <= 1 && f >= 0, "Incorrect frequency");
+ uint i = _stack.index();
+ if (i < n->req()) {
+ c = n->in(i);
+ _stack.set_index(i+1);
+ _freqs_stack.push(f);
+ } else {
+ _freqs.at_put_grow(n->_idx, f, -1);
+ _stack.pop();
+ }
+ }
+ }
+ if (_stack.size() == 0) {
+ fesetround(FE_TONEAREST);
+ assert(f >= 0 && f <= 1, "should have been computed");
+ return f;
+ }
+ } else if (c->is_Loop()) {
+ ShouldNotReachHere();
+ c = c->in(LoopNode::EntryControl);
+ } else if (c->is_Region()) {
+ _freqs_stack.push(0);
+ _stack.push(c, 2);
+ c = c->in(1);
+ } else {
+ if (c->is_IfProj()) {
+ IfNode* iff = c->in(0)->as_If();
+ if (iff->_prob == PROB_UNKNOWN) {
+ // assume never taken
+ _freqs.at_put_grow(c->_idx, 0, -1);
+ } else if (_phase->get_loop(c) != _phase->get_loop(iff)) {
+ if (iff->_fcnt == COUNT_UNKNOWN) {
+ // assume never taken
+ _freqs.at_put_grow(c->_idx, 0, -1);
+ } else {
+ // skip over loop
+ _stack.push(c, 1);
+ c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
+ }
+ } else {
+ _stack.push(c, 1);
+ c = iff;
+ }
+ } else if (c->is_JumpProj()) {
+ JumpNode* jmp = c->in(0)->as_Jump();
+ if (_phase->get_loop(c) != _phase->get_loop(jmp)) {
+ if (jmp->_fcnt == COUNT_UNKNOWN) {
+ // assume never taken
+ _freqs.at_put_grow(c->_idx, 0, -1);
+ } else {
+ // skip over loop
+ _stack.push(c, 1);
+ c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
+ }
+ } else {
+ _stack.push(c, 1);
+ c = jmp;
+ }
+ } else if (c->Opcode() == Op_CatchProj &&
+ c->in(0)->Opcode() == Op_Catch &&
+ c->in(0)->in(0)->is_Proj() &&
+ c->in(0)->in(0)->in(0)->is_Call()) {
+ // assume exceptions are never thrown
+ uint con = c->as_Proj()->_con;
+ if (con == CatchProjNode::fall_through_index) {
+ Node* call = c->in(0)->in(0)->in(0)->in(0);
+ if (_phase->get_loop(call) != _phase->get_loop(c)) {
+ _freqs.at_put_grow(c->_idx, 0, -1);
+ } else {
+ c = call;
+ }
+ } else {
+ assert(con >= CatchProjNode::catch_all_index, "what else?");
+ _freqs.at_put_grow(c->_idx, 0, -1);
+ }
+ } else if (c->unique_ctrl_out() == NULL && !c->is_If() && !c->is_Jump()) {
+ ShouldNotReachHere();
+ } else {
+ c = c->in(0);
+ }
+ }
+ }
+ ShouldNotReachHere();
+ return -1;
+ }
+};
+
+void PhaseIdealLoop::loop_predication_follow_branches(Node *n, IdealLoopTree *loop, float loop_trip_cnt,
+ PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
+ Node_List& if_proj_list) {
+ assert(n->is_Region(), "start from a region");
+ Node* tail = loop->tail();
+ stack.push(n, 1);
+ do {
+ Node* c = stack.node();
+ assert(c->is_Region() || c->is_IfProj(), "only region here");
+ uint i = stack.index();
+
+ if (i < c->req()) {
+ stack.set_index(i+1);
+ Node* in = c->in(i);
+ while (!is_dominator(in, tail) && !seen.test_set(in->_idx)) {
+ IdealLoopTree* in_loop = get_loop(in);
+ if (in_loop != loop) {
+ in = in_loop->_head->in(LoopNode::EntryControl);
+ } else if (in->is_Region()) {
+ stack.push(in, 1);
+ break;
+ } else if (in->is_IfProj() &&
+ in->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
+ if (pf.to(in) * loop_trip_cnt >= 1) {
+ stack.push(in, 1);
+ }
+ in = in->in(0);
+ } else {
+ in = in->in(0);
+ }
+ }
+ } else {
+ if (c->is_IfProj()) {
+ if_proj_list.push(c);
+ }
+ stack.pop();
+ }
+
+ } while (stack.size() > 0);
+}
+
+
+bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
+ CountedLoopNode *cl, ConNode* zero, Invariance& invar,
+ Deoptimization::DeoptReason reason) {
+ // Following are changed to nonnull when a predicate can be hoisted
+ ProjNode* new_predicate_proj = NULL;
+ IfNode* iff = proj->in(0)->as_If();
+ Node* test = iff->in(1);
+ if (!test->is_Bool()){ //Conv2B, ...
+ return false;
+ }
+ BoolNode* bol = test->as_Bool();
+ if (invar.is_invariant(bol)) {
+ // Invariant test
+ new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
+ reason,
+ iff->Opcode());
+ Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
+ BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
+
+ // Negate test if necessary
+ bool negated = false;
+ if (proj->_con != predicate_proj->_con) {
+ new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
+ register_new_node(new_predicate_bol, ctrl);
+ negated = true;
+ }
+ IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
+ _igvn.hash_delete(new_predicate_iff);
+ new_predicate_iff->set_req(1, new_predicate_bol);
+#ifndef PRODUCT
+ if (TraceLoopPredicate) {
+ tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
+ loop->dump_head();
+ } else if (TraceLoopOpts) {
+ tty->print("Predicate IC ");
+ loop->dump_head();
+ }
+#endif
+ } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
+ // Range check for counted loops
+ const Node* cmp = bol->in(1)->as_Cmp();
+ Node* idx = cmp->in(1);
+ assert(!invar.is_invariant(idx), "index is variant");
+ Node* rng = cmp->in(2);
+ assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
+ assert(invar.is_invariant(rng), "range must be invariant");
+ int scale = 1;
+ Node* offset = zero;
+ bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
+ assert(ok, "must be index expression");
+
+ Node* init = cl->init_trip();
+ // Limit is not exact.
+ // Calculate exact limit here.
+ // Note, counted loop's test is '<' or '>'.
+ Node* limit = exact_limit(loop);
+ int stride = cl->stride()->get_int();
+
+ // Build if's for the upper and lower bound tests. The
+ // lower_bound test will dominate the upper bound test and all
+ // cloned or created nodes will use the lower bound test as
+ // their declared control.
+
+ // Perform cloning to keep Invariance state correct since the
+ // late schedule will place invariant things in the loop.
+ Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
+ rng = invar.clone(rng, ctrl);
+ if (offset && offset != zero) {
+ assert(invar.is_invariant(offset), "offset must be loop invariant");
+ offset = invar.clone(offset, ctrl);
+ }
+ // If predicate expressions may overflow in the integer range, longs are used.
+ bool overflow = false;
+
+ // Test the lower bound
+ BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
+ // Negate test if necessary
+ bool negated = false;
+ if (proj->_con != predicate_proj->_con) {
+ lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
+ register_new_node(lower_bound_bol, ctrl);
+ negated = true;
+ }
+ ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
+ IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
+ _igvn.hash_delete(lower_bound_iff);
+ lower_bound_iff->set_req(1, lower_bound_bol);
+ if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
+
+ // Test the upper bound
+ BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
+ negated = false;
+ if (proj->_con != predicate_proj->_con) {
+ upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
+ register_new_node(upper_bound_bol, ctrl);
+ negated = true;
+ }
+ ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
+ assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
+ IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
+ _igvn.hash_delete(upper_bound_iff);
+ upper_bound_iff->set_req(1, upper_bound_bol);
+ if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
+
+ // Fall through into rest of the clean up code which will move
+ // any dependent nodes onto the upper bound test.
+ new_predicate_proj = upper_bound_proj;
+
+ if (iff->is_RangeCheck()) {
+ new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
+ }
+
+#ifndef PRODUCT
+ if (TraceLoopOpts && !TraceLoopPredicate) {
+ tty->print("Predicate RC ");
+ loop->dump_head();
+ }
+#endif
+ } else {
+ // Loop variant check (for example, range check in non-counted loop)
+ // with uncommon trap.
+ return false;
+ }
+ assert(new_predicate_proj != NULL, "sanity");
+ // Success - attach condition (new_predicate_bol) to predicate if
+ invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
+
+ // Eliminate the old If in the loop body
+ dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );
+
+ C->set_major_progress();
+ return true;
+}
+
+
// After pre/main/post loops are created, we'll put a copy of some
// range checks between the pre and main loop to validate the initial
// value of the induction variable for the main loop. Make a copy of
@@ -776,14 +1221,15 @@
ProjNode* upper_bound_proj,
int scale, Node* offset,
Node* init, Node* limit, jint stride,
- Node* rng, bool &overflow) {
+ Node* rng, bool &overflow,
+ Deoptimization::DeoptReason reason) {
assert(proj->_con && predicate_proj->_con, "not a range check?");
Node* opaque_init = new Opaque1Node(C, init);
register_new_node(opaque_init, upper_bound_proj);
BoolNode* bol = rc_predicate(loop, upper_bound_proj, scale, offset, opaque_init, limit, stride, rng, (stride > 0) != (scale > 0), overflow);
Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); // This will go away once loop opts are over
register_new_node(opaque_bol, upper_bound_proj);
- ProjNode* new_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
+ ProjNode* new_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
_igvn.replace_input_of(new_proj->in(0), 1, opaque_bol);
assert(opaque_init->outcnt() > 0, "should be used");
return new_proj;
@@ -821,13 +1267,32 @@
}
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
+ ProjNode *loop_limit_proj = NULL;
ProjNode *predicate_proj = NULL;
+ ProjNode *profile_predicate_proj = NULL;
// Loop limit check predicate should be near the loop.
- predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
- if (predicate_proj != NULL)
- entry = predicate_proj->in(0)->in(0);
+ loop_limit_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+ if (loop_limit_proj != NULL) {
+ entry = loop_limit_proj->in(0)->in(0);
+ }
+ bool has_profile_predicates = false;
+ profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+ if (profile_predicate_proj != NULL) {
+ Node* n = skip_loop_predicates(entry);
+ // Check if predicates were already added to the profile predicate
+ // block
+ if (n != entry->in(0)->in(0)) {
+ has_profile_predicates = true;
+ }
+ entry = n;
+ }
predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
- if (!predicate_proj) {
+
+ float loop_trip_cnt = -1;
+ bool follow_branches = loop_predication_should_follow_branches(loop, profile_predicate_proj, loop_trip_cnt);
+ assert(!follow_branches || loop_trip_cnt >= 0, "negative trip count?");
+
+ if (predicate_proj == NULL && !follow_branches) {
#ifndef PRODUCT
if (TraceLoopPredicate) {
tty->print("missing predicate:");
@@ -846,7 +1311,11 @@
// Create list of if-projs such that a newer proj dominates all older
// projs in the list, and they all dominate loop->tail()
Node_List if_proj_list(area);
+ Node_List regions(area);
Node *current_proj = loop->tail(); //start from tail
+
+
+ Node_List controls(area);
while (current_proj != head) {
if (loop == get_loop(current_proj) && // still in the loop ?
current_proj->is_Proj() && // is a projection ?
@@ -854,161 +1323,79 @@
current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
if_proj_list.push(current_proj);
}
+ if (follow_branches &&
+ current_proj->Opcode() == Op_Region &&
+ loop == get_loop(current_proj)) {
+ regions.push(current_proj);
+ }
current_proj = idom(current_proj);
}
bool hoisted = false; // true if at least one proj is promoted
- while (if_proj_list.size() > 0) {
- // Following are changed to nonnull when a predicate can be hoisted
- ProjNode* new_predicate_proj = NULL;
- ProjNode* proj = if_proj_list.pop()->as_Proj();
- IfNode* iff = proj->in(0)->as_If();
+ if (!has_profile_predicates) {
+ while (if_proj_list.size() > 0) {
+ Node* n = if_proj_list.pop();
+
+ ProjNode* proj = n->as_Proj();
+ IfNode* iff = proj->in(0)->as_If();
- if (!proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
- if (loop->is_loop_exit(iff)) {
- // stop processing the remaining projs in the list because the execution of them
- // depends on the condition of "iff" (iff->in(1)).
+ CallStaticJavaNode* call = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
+ if (call == NULL) {
+ if (loop->is_loop_exit(iff)) {
+ // stop processing the remaining projs in the list because the execution of them
+ // depends on the condition of "iff" (iff->in(1)).
+ break;
+ } else {
+ // Both arms are inside the loop. There are two cases:
+ // (1) there is one backward branch. In this case, any remaining proj
+ // in the if_proj list post-dominates "iff". So, the condition of "iff"
+ // does not determine the execution the remining projs directly, and we
+ // can safely continue.
+ // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
+ // does not dominate loop->tail(), so it can not be in the if_proj list.
+ continue;
+ }
+ }
+ Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(call->uncommon_trap_request());
+ if (reason == Deoptimization::Reason_predicate) {
break;
- } else {
- // Both arms are inside the loop. There are two cases:
- // (1) there is one backward branch. In this case, any remaining proj
- // in the if_proj list post-dominates "iff". So, the condition of "iff"
- // does not determine the execution the remining projs directly, and we
- // can safely continue.
- // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
- // does not dominate loop->tail(), so it can not be in the if_proj list.
- continue;
+ }
+
+ if (predicate_proj != NULL) {
+ hoisted = loop_predication_impl_helper(loop, proj, predicate_proj, cl, zero, invar, Deoptimization::Reason_predicate) | hoisted;
+ }
+ } // end while
+ }
+
+ Node_List if_proj_list_freq(area);
+ if (follow_branches) {
+ PathFrequency pf(loop->_head, this);
+
+ // Some projections were skipped by regular predicates because of
+ // an early loop exit. Try them with profile data.
+ while (if_proj_list.size() > 0) {
+ Node* proj = if_proj_list.pop();
+ float f = pf.to(proj);
+ if (proj->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
+ f * loop_trip_cnt >= 1) {
+ hoisted = loop_predication_impl_helper(loop, proj->as_Proj(), profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
}
}
- Node* test = iff->in(1);
- if (!test->is_Bool()){ //Conv2B, ...
- continue;
+ // And look into all branches
+ Node_Stack stack(0);
+ VectorSet seen(Thread::current()->resource_area());
+ while (regions.size() > 0) {
+ Node* c = regions.pop();
+ loop_predication_follow_branches(c, loop, loop_trip_cnt, pf, stack, seen, if_proj_list_freq);
}
- BoolNode* bol = test->as_Bool();
- if (invar.is_invariant(bol)) {
- // Invariant test
- new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
- Deoptimization::Reason_predicate,
- iff->Opcode());
- Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
- BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
-
- // Negate test if necessary
- bool negated = false;
- if (proj->_con != predicate_proj->_con) {
- new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
- register_new_node(new_predicate_bol, ctrl);
- negated = true;
- }
- IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
- _igvn.hash_delete(new_predicate_iff);
- new_predicate_iff->set_req(1, new_predicate_bol);
-#ifndef PRODUCT
- if (TraceLoopPredicate) {
- tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
- loop->dump_head();
- } else if (TraceLoopOpts) {
- tty->print("Predicate IC ");
- loop->dump_head();
- }
-#endif
- } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
- // Range check for counted loops
- const Node* cmp = bol->in(1)->as_Cmp();
- Node* idx = cmp->in(1);
- assert(!invar.is_invariant(idx), "index is variant");
- Node* rng = cmp->in(2);
- assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
- assert(invar.is_invariant(rng), "range must be invariant");
- int scale = 1;
- Node* offset = zero;
- bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
- assert(ok, "must be index expression");
-
- Node* init = cl->init_trip();
- // Limit is not exact.
- // Calculate exact limit here.
- // Note, counted loop's test is '<' or '>'.
- Node* limit = exact_limit(loop);
- int stride = cl->stride()->get_int();
-
- // Build if's for the upper and lower bound tests. The
- // lower_bound test will dominate the upper bound test and all
- // cloned or created nodes will use the lower bound test as
- // their declared control.
- // Perform cloning to keep Invariance state correct since the
- // late schedule will place invariant things in the loop.
- Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
- rng = invar.clone(rng, ctrl);
- if (offset && offset != zero) {
- assert(invar.is_invariant(offset), "offset must be loop invariant");
- offset = invar.clone(offset, ctrl);
- }
- // If predicate expressions may overflow in the integer range, longs are used.
- bool overflow = false;
-
- // Test the lower bound
- BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
- // Negate test if necessary
- bool negated = false;
- if (proj->_con != predicate_proj->_con) {
- lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
- register_new_node(lower_bound_bol, ctrl);
- negated = true;
- }
- ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
- IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
- _igvn.hash_delete(lower_bound_iff);
- lower_bound_iff->set_req(1, lower_bound_bol);
- if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
-
- // Test the upper bound
- BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
- negated = false;
- if (proj->_con != predicate_proj->_con) {
- upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
- register_new_node(upper_bound_bol, ctrl);
- negated = true;
- }
- ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
- assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
- IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
- _igvn.hash_delete(upper_bound_iff);
- upper_bound_iff->set_req(1, upper_bound_bol);
- if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
-
- // Fall through into rest of the clean up code which will move
- // any dependent nodes onto the upper bound test.
- new_predicate_proj = upper_bound_proj;
-
- if (iff->is_RangeCheck()) {
- new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow);
- }
-
-#ifndef PRODUCT
- if (TraceLoopOpts && !TraceLoopPredicate) {
- tty->print("Predicate RC ");
- loop->dump_head();
- }
-#endif
- } else {
- // Loop variant check (for example, range check in non-counted loop)
- // with uncommon trap.
- continue;
+ for (uint i = 0; i < if_proj_list_freq.size(); i++) {
+ ProjNode* proj = if_proj_list_freq.at(i)->as_Proj();
+ hoisted = loop_predication_impl_helper(loop, proj, profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
}
- assert(new_predicate_proj != NULL, "sanity");
- // Success - attach condition (new_predicate_bol) to predicate if
- invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
-
- // Eliminate the old If in the loop body
- dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );
-
- hoisted = true;
- C->set_major_progress();
- } // end while
+ }
#ifndef PRODUCT
// report that the loop predication has been actually performed
--- a/src/hotspot/share/opto/loopTransform.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/loopTransform.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -137,11 +137,45 @@
//------------------------------compute_profile_trip_cnt----------------------------
// Compute loop trip count from profile data as
// (backedge_count + loop_exit_count) / loop_exit_count
-void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
- if (!_head->is_CountedLoop()) {
+
+float IdealLoopTree::compute_profile_trip_cnt_helper(Node* n) {
+ if (n->is_If()) {
+ IfNode *iff = n->as_If();
+ if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
+ Node *exit = is_loop_exit(iff);
+ if (exit) {
+ float exit_prob = iff->_prob;
+ if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
+ if (exit_prob > PROB_MIN) {
+ float exit_cnt = iff->_fcnt * exit_prob;
+ return exit_cnt;
+ }
+ }
+ }
+ }
+ if (n->is_Jump()) {
+ JumpNode *jmp = n->as_Jump();
+ if (jmp->_fcnt != COUNT_UNKNOWN) {
+ float* probs = jmp->_probs;
+ float exit_prob = 0;
+ PhaseIdealLoop *phase = _phase;
+ for (DUIterator_Fast imax, i = jmp->fast_outs(imax); i < imax; i++) {
+ JumpProjNode* u = jmp->fast_out(i)->as_JumpProj();
+ if (!is_member(_phase->get_loop(u))) {
+ exit_prob += probs[u->_con];
+ }
+ }
+ return exit_prob * jmp->_fcnt;
+ }
+ }
+ return 0;
+}
+
+void IdealLoopTree::compute_profile_trip_cnt(PhaseIdealLoop *phase) {
+ if (!_head->is_Loop()) {
return;
}
- CountedLoopNode* head = _head->as_CountedLoop();
+ LoopNode* head = _head->as_Loop();
if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
return; // Already computed
}
@@ -153,7 +187,8 @@
back->in(0) &&
back->in(0)->is_If() &&
back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
- back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
+ back->in(0)->as_If()->_prob != PROB_UNKNOWN &&
+ (back->Opcode() == Op_IfTrue ? 1-back->in(0)->as_If()->_prob : back->in(0)->as_If()->_prob) > PROB_MIN) {
break;
}
back = phase->idom(back);
@@ -162,26 +197,34 @@
assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
back->in(0), "if-projection exists");
IfNode* back_if = back->in(0)->as_If();
- float loop_back_cnt = back_if->_fcnt * back_if->_prob;
+ float loop_back_cnt = back_if->_fcnt * (back->Opcode() == Op_IfTrue ? back_if->_prob : (1 - back_if->_prob));
// Now compute a loop exit count
float loop_exit_cnt = 0.0f;
- for( uint i = 0; i < _body.size(); i++ ) {
- Node *n = _body[i];
- if( n->is_If() ) {
- IfNode *iff = n->as_If();
- if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
- Node *exit = is_loop_exit(iff);
- if( exit ) {
- float exit_prob = iff->_prob;
- if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
- if (exit_prob > PROB_MIN) {
- float exit_cnt = iff->_fcnt * exit_prob;
- loop_exit_cnt += exit_cnt;
+ if (_child == NULL) {
+ for( uint i = 0; i < _body.size(); i++ ) {
+ Node *n = _body[i];
+ loop_exit_cnt += compute_profile_trip_cnt_helper(n);
+ }
+ } else {
+ ResourceMark rm;
+ Unique_Node_List wq;
+ wq.push(back);
+ for (uint i = 0; i < wq.size(); i++) {
+ Node *n = wq.at(i);
+ assert(n->is_CFG(), "only control nodes");
+ if (n != head) {
+ if (n->is_Region()) {
+ for (uint j = 1; j < n->req(); j++) {
+ wq.push(n->in(j));
}
+ } else {
+ loop_exit_cnt += compute_profile_trip_cnt_helper(n);
+ wq.push(n->in(0));
}
}
}
+
}
if (loop_exit_cnt > 0.0f) {
trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
@@ -189,6 +232,8 @@
// No exit count so use
trip_cnt = loop_back_cnt;
}
+ } else {
+ head->mark_profile_trip_failed();
}
#ifndef PRODUCT
if (TraceProfileTripCount) {
@@ -1016,9 +1061,120 @@
// the control paths must die too but the range checks were removed by
// predication. The range checks that we add here guarantee that they
// do.
-void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* min_taken, Node* castii,
- IdealLoopTree* outer_loop, LoopNode* outer_main_head,
- uint dd_main_head) {
+void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
+ LoopNode* outer_main_head, uint dd_main_head) {
+ if (predicate != NULL) {
+ IfNode* iff = predicate->in(0)->as_If();
+ ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
+ Node* rgn = uncommon_proj->unique_ctrl_out();
+ assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+ assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
+ predicate = predicate->in(0)->in(0);
+ Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
+ Node* prev_proj = current_proj;
+ while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
+ uncommon_proj = predicate->in(0)->as_If()->proj_out(1 - predicate->as_Proj()->_con);
+ if (uncommon_proj->unique_ctrl_out() != rgn)
+ break;
+ iff = predicate->in(0)->as_If();
+ if (iff->in(1)->Opcode() == Op_Opaque4) {
+ Node_Stack to_clone(2);
+ to_clone.push(iff->in(1), 1);
+ uint current = C->unique();
+ Node* result = NULL;
+ // Look for the opaque node to replace with the init value
+ // and clone everything in between. We keep the Opaque4 node
+ // so the duplicated predicates are eliminated once loop
+ // opts are over: they are here only to keep the IR graph
+ // consistent.
+ do {
+ Node* n = to_clone.node();
+ uint i = to_clone.index();
+ Node* m = n->in(i);
+ int op = m->Opcode();
+ if (m->is_Bool() ||
+ m->is_Cmp() ||
+ op == Op_AndL ||
+ op == Op_OrL ||
+ op == Op_RShiftL ||
+ op == Op_LShiftL ||
+ op == Op_AddL ||
+ op == Op_AddI ||
+ op == Op_MulL ||
+ op == Op_MulI ||
+ op == Op_SubL ||
+ op == Op_SubI ||
+ op == Op_ConvI2L) {
+ to_clone.push(m, 1);
+ continue;
+ }
+ if (op == Op_Opaque1) {
+ if (n->_idx < current) {
+ n = n->clone();
+ }
+ n->set_req(i, castii);
+ register_new_node(n, current_proj);
+ to_clone.set_node(n);
+ }
+ for (;;) {
+ Node* cur = to_clone.node();
+ uint j = to_clone.index();
+ if (j+1 < cur->req()) {
+ to_clone.set_index(j+1);
+ break;
+ }
+ to_clone.pop();
+ if (to_clone.size() == 0) {
+ result = cur;
+ break;
+ }
+ Node* next = to_clone.node();
+ j = to_clone.index();
+ if (cur->_idx >= current) {
+ if (next->_idx < current) {
+ next = next->clone();
+ register_new_node(next, current_proj);
+ to_clone.set_node(next);
+ }
+ assert(next->in(j) != cur, "input should have been cloned");
+ next->set_req(j, cur);
+ }
+ }
+ } while (result == NULL);
+ assert(result->_idx >= current, "new node expected");
+
+ Node* proj = predicate->clone();
+ Node* other_proj = uncommon_proj->clone();
+ Node* new_iff = iff->clone();
+ new_iff->set_req(1, result);
+ proj->set_req(0, new_iff);
+ other_proj->set_req(0, new_iff);
+ Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
+ register_new_node(frame, C->start());
+ // It's impossible for the predicate to fail at runtime. Use
+ // an Halt node.
+ Node* halt = new HaltNode(other_proj, frame);
+ C->root()->add_req(halt);
+ new_iff->set_req(0, prev_proj);
+
+ register_control(new_iff, outer_loop->_parent, prev_proj);
+ register_control(proj, outer_loop->_parent, new_iff);
+ register_control(other_proj, _ltree_root, new_iff);
+ register_control(halt, _ltree_root, other_proj);
+
+ prev_proj = proj;
+ }
+ predicate = predicate->in(0)->in(0);
+ }
+ if (prev_proj != current_proj) {
+ _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
+ set_idom(outer_main_head, prev_proj, dd_main_head);
+ }
+ }
+}
+
+void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
+ LoopNode* outer_main_head, uint dd_main_head) {
if (UseLoopPredicate) {
Node* entry = pre_head->in(LoopNode::EntryControl);
Node* predicate = NULL;
@@ -1026,112 +1182,16 @@
if (predicate != NULL) {
entry = entry->in(0)->in(0);
}
+ Node* profile_predicate = NULL;
+ if (UseProfiledLoopPredicate) {
+ profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+ if (profile_predicate != NULL) {
+ entry = skip_loop_predicates(entry);
+ }
+ }
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
- if (predicate != NULL) {
- IfNode* iff = entry->in(0)->as_If();
- ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
- Node* rgn = uncommon_proj->unique_ctrl_out();
- assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
- assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
- entry = entry->in(0)->in(0);
- Node* prev_proj = min_taken;
- while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
- uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
- if (uncommon_proj->unique_ctrl_out() != rgn)
- break;
- iff = entry->in(0)->as_If();
- if (iff->in(1)->Opcode() == Op_Opaque4) {
- Node_Stack to_clone(2);
- to_clone.push(iff->in(1), 1);
- uint current = C->unique();
- Node* result = NULL;
- // Look for the opaque node to replace with the init value
- // and clone everything in between. We keep the Opaque4 node
- // so the duplicated predicates are eliminated once loop
- // opts are over: they are here only to keep the IR graph
- // consistent.
- do {
- Node* n = to_clone.node();
- uint i = to_clone.index();
- Node* m = n->in(i);
- int op = m->Opcode();
- if (m->is_Bool() ||
- m->is_Cmp() ||
- op == Op_AndL ||
- op == Op_OrL ||
- op == Op_RShiftL ||
- op == Op_LShiftL ||
- op == Op_AddL ||
- op == Op_AddI ||
- op == Op_MulL ||
- op == Op_MulI ||
- op == Op_SubL ||
- op == Op_SubI ||
- op == Op_ConvI2L) {
- to_clone.push(m, 1);
- continue;
- }
- if (op == Op_Opaque1) {
- if (n->_idx < current) {
- n = n->clone();
- }
- n->set_req(i, castii);
- register_new_node(n, min_taken);
- to_clone.set_node(n);
- }
- for (;;) {
- Node* cur = to_clone.node();
- uint j = to_clone.index();
- if (j+1 < cur->req()) {
- to_clone.set_index(j+1);
- break;
- }
- to_clone.pop();
- if (to_clone.size() == 0) {
- result = cur;
- break;
- }
- Node* next = to_clone.node();
- j = to_clone.index();
- if (cur->_idx >= current) {
- if (next->_idx < current) {
- next = next->clone();
- register_new_node(next, min_taken);
- to_clone.set_node(next);
- }
- assert(next->in(j) != cur, "input should have been cloned");
- next->set_req(j, cur);
- }
- }
- } while (result == NULL);
- assert(result->_idx >= current, "new node expected");
-
- Node* proj = entry->clone();
- Node* other_proj = uncommon_proj->clone();
- Node* new_iff = iff->clone();
- new_iff->set_req(1, result);
- proj->set_req(0, new_iff);
- other_proj->set_req(0, new_iff);
- Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
- register_new_node(frame, C->start());
- // It's impossible for the predicate to fail at runtime. Use
- // an Halt node.
- Node* halt = new HaltNode(other_proj, frame);
- C->root()->add_req(halt);
- new_iff->set_req(0, prev_proj);
-
- register_control(new_iff, outer_loop->_parent, prev_proj);
- register_control(proj, outer_loop->_parent, new_iff);
- register_control(other_proj, _ltree_root, new_iff);
- register_control(halt, _ltree_root, other_proj);
-
- prev_proj = proj;
- }
- entry = entry->in(0)->in(0);
- }
- _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
- set_idom(outer_main_head, prev_proj, dd_main_head);
- }
+ duplicate_predicates_helper(predicate, castii, outer_loop, outer_main_head, dd_main_head);
+ duplicate_predicates_helper(profile_predicate, castii, outer_loop, outer_main_head, dd_main_head);
}
}
@@ -1278,7 +1338,7 @@
// CastII for the main loop:
Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
assert(castii != NULL, "no castII inserted");
- duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head);
+ duplicate_predicates(pre_head, castii, outer_loop, outer_main_head, dd_main_head);
// Step B4: Shorten the pre-loop to run only 1 iteration (for now).
// RCE and alignment may change this later.
@@ -2815,7 +2875,7 @@
}
if (needs_guard) {
// Check for an obvious zero trip guard.
- Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_predicates());
+ Node* inctrl = PhaseIdealLoop::skip_all_loop_predicates(cl->skip_predicates());
if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
// The test should look like just the backedge of a CountedLoop
--- a/src/hotspot/share/opto/loopUnswitch.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/loopUnswitch.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -138,9 +138,19 @@
Node* uniqc = proj_true->unique_ctrl_out();
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
Node* predicate = find_predicate(entry);
+ if (predicate != NULL) {
+ entry = skip_loop_predicates(entry);
+ }
if (predicate != NULL && UseLoopPredicate) {
// We may have two predicates, find first.
- entry = find_predicate(entry->in(0)->in(0));
+ Node* n = find_predicate(entry);
+ if (n != NULL) {
+ predicate = n;
+ entry = skip_loop_predicates(entry);
+ }
+ }
+ if (predicate != NULL && UseProfiledLoopPredicate) {
+ entry = find_predicate(entry);
if (entry != NULL) predicate = entry;
}
if (predicate != NULL) predicate = predicate->in(0);
--- a/src/hotspot/share/opto/loopnode.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/loopnode.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -1281,9 +1281,7 @@
return l->outer_safepoint();
}
-Node* CountedLoopNode::skip_predicates() {
- if (is_main_loop()) {
- Node* ctrl = skip_strip_mined()->in(LoopNode::EntryControl);
+Node* CountedLoopNode::skip_predicates_from_entry(Node* ctrl) {
while (ctrl != NULL && ctrl->is_Proj() && ctrl->in(0)->is_If() &&
ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->outcnt() == 1 &&
ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->unique_out()->Opcode() == Op_Halt) {
@@ -1292,6 +1290,13 @@
return ctrl;
}
+
+Node* CountedLoopNode::skip_predicates() {
+ if (is_main_loop()) {
+ Node* ctrl = skip_strip_mined()->in(LoopNode::EntryControl);
+
+ return skip_predicates_from_entry(ctrl);
+ }
return in(LoopNode::EntryControl);
}
@@ -2400,6 +2405,13 @@
entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (entry != NULL) {
tty->print(" predicated");
+ entry = PhaseIdealLoop::skip_loop_predicates(entry);
+ }
+ }
+ if (UseProfiledLoopPredicate) {
+ entry = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
+ if (entry != NULL) {
+ tty->print(" profile_predicated");
}
}
if (_head->is_CountedLoop()) {
@@ -2507,11 +2519,18 @@
if (predicate_proj != NULL ) { // right pattern that can be used by loop predication
assert(entry->in(0)->in(1)->in(1)->Opcode() == Op_Opaque1, "must be");
useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
- entry = entry->in(0)->in(0);
+ entry = skip_loop_predicates(entry);
}
predicate_proj = find_predicate(entry); // Predicate
if (predicate_proj != NULL ) {
useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
+ entry = skip_loop_predicates(entry);
+ }
+ if (UseProfiledLoopPredicate) {
+ predicate_proj = find_predicate(entry); // Predicate
+ if (predicate_proj != NULL ) {
+ useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
+ }
}
}
@@ -4205,12 +4224,33 @@
// which can inhibit range check elimination.
if (least != early) {
Node* ctrl_out = least->unique_ctrl_out();
- if (ctrl_out && ctrl_out->is_Loop() &&
- least == ctrl_out->in(LoopNode::EntryControl) &&
- (ctrl_out->is_CountedLoop() || ctrl_out->is_OuterStripMinedLoop())) {
- Node* least_dom = idom(least);
- if (get_loop(least_dom)->is_member(get_loop(least))) {
- least = least_dom;
+ if (ctrl_out && ctrl_out->is_CountedLoop() &&
+ least == ctrl_out->in(LoopNode::EntryControl)) {
+ Node* new_ctrl = least;
+ // Move the node above predicates so a following pass of loop
+ // predication doesn't hoist a predicate that depends on it
+ // above that node.
+ if (find_predicate_insertion_point(new_ctrl, Deoptimization::Reason_loop_limit_check) != NULL) {
+ new_ctrl = new_ctrl->in(0)->in(0);
+ assert(is_dominator(early, new_ctrl), "least != early so we can move up the dominator tree");
+ }
+ if (find_predicate_insertion_point(new_ctrl, Deoptimization::Reason_profile_predicate) != NULL) {
+ Node* c = new_ctrl->in(0)->in(0);
+ assert(is_dominator(early, c), "least != early so we can move up the dominator tree");
+ new_ctrl = c;
+ }
+ if (find_predicate_insertion_point(new_ctrl, Deoptimization::Reason_predicate) != NULL) {
+ Node* c = new_ctrl->in(0)->in(0);
+ assert(is_dominator(early, c), "least != early so we can move up the dominator tree");
+ new_ctrl = c;
+ }
+ if (new_ctrl != ctrl_out) {
+ least = new_ctrl;
+ } else if (ctrl_out->is_CountedLoop() || ctrl_out->is_OuterStripMinedLoop()) {
+ Node* least_dom = idom(least);
+ if (get_loop(least_dom)->is_member(get_loop(least))) {
+ least = least_dom;
+ }
}
}
}
--- a/src/hotspot/share/opto/loopnode.hpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/loopnode.hpp Tue Jun 19 09:08:39 2018 +0200
@@ -38,6 +38,7 @@
class LoopNode;
class Node;
class OuterStripMinedLoopEndNode;
+class PathFrequency;
class PhaseIdealLoop;
class CountedLoopReserveKit;
class VectorSet;
@@ -57,7 +58,7 @@
// the semantics so it does not appear in the hash & cmp functions.
virtual uint size_of() const { return sizeof(*this); }
protected:
- short _loop_flags;
+ uint _loop_flags;
// Names for flag bitfields
enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
MainHasNoPreLoop=4,
@@ -73,26 +74,31 @@
HasAtomicPostLoop=4096,
HasRangeChecks=8192,
IsMultiversioned=16384,
- StripMined=32768};
+ StripMined=32768,
+ ProfileTripFailed=65536};
char _unswitch_count;
enum { _unswitch_max=3 };
char _postloop_flags;
enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
+ // Expected trip count from profile data
+ float _profile_trip_cnt;
+
public:
// Names for edge indices
enum { Self=0, EntryControl, LoopBackControl };
- int is_inner_loop() const { return _loop_flags & InnerLoop; }
+ bool is_inner_loop() const { return _loop_flags & InnerLoop; }
void set_inner_loop() { _loop_flags |= InnerLoop; }
- int range_checks_present() const { return _loop_flags & HasRangeChecks; }
- int is_multiversioned() const { return _loop_flags & IsMultiversioned; }
- int is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
- int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
+ bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
+ bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
+ bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
+ bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
- int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
- int is_strip_mined() const { return _loop_flags & StripMined; }
+ bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
+ bool is_strip_mined() const { return _loop_flags & StripMined; }
+ bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
void mark_has_reductions() { _loop_flags |= HasReductions; }
@@ -105,6 +111,7 @@
void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
void mark_strip_mined() { _loop_flags |= StripMined; }
void clear_strip_mined() { _loop_flags &= ~StripMined; }
+ void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
int unswitch_max() { return _unswitch_max; }
int unswitch_count() { return _unswitch_count; }
@@ -119,7 +126,12 @@
_unswitch_count = val;
}
- LoopNode(Node *entry, Node *backedge) : RegionNode(3), _loop_flags(0), _unswitch_count(0), _postloop_flags(0) {
+ void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
+ float profile_trip_cnt() { return _profile_trip_cnt; }
+
+ LoopNode(Node *entry, Node *backedge)
+ : RegionNode(3), _loop_flags(0), _unswitch_count(0),
+ _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
init_class_id(Class_Loop);
init_req(EntryControl, entry);
init_req(LoopBackControl, backedge);
@@ -186,9 +198,6 @@
// Known trip count calculated by compute_exact_trip_count()
uint _trip_count;
- // Expected trip count from profile data
- float _profile_trip_cnt;
-
// Log2 of original loop bodies in unrolled loop
int _unrolled_count_log2;
@@ -203,8 +212,8 @@
public:
CountedLoopNode( Node *entry, Node *backedge )
: LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
- _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
- _node_count_before_unroll(0), _slp_maximum_unroll_factor(0) {
+ _unrolled_count_log2(0), _node_count_before_unroll(0),
+ _slp_maximum_unroll_factor(0) {
init_class_id(Class_CountedLoop);
// Initialize _trip_count to the largest possible value.
// Will be reset (lower) if the loop's trip count is known.
@@ -245,16 +254,16 @@
// A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
// Aligned, may be missing it's pre-loop.
- int is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
- int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
- int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
- int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
- int is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
- int was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
- int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
- int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
- int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
- int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
+ bool is_normal_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
+ bool is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
+ bool is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
+ bool is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
+ bool is_reduction_loop() const { return (_loop_flags&HasReductions) == HasReductions; }
+ bool was_slp_analyzed () const { return (_loop_flags&WasSlpAnalyzed) == WasSlpAnalyzed; }
+ bool has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
+ bool do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
+ bool is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
+ bool has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
int main_idx() const { return _main_idx; }
@@ -280,9 +289,6 @@
_loop_flags &= ~PassedSlpAnalysis;
}
- void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
- float profile_trip_cnt() { return _profile_trip_cnt; }
-
void double_unrolled_count() { _unrolled_count_log2++; }
int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
@@ -301,6 +307,7 @@
// If this is a main loop in a pre/main/post loop nest, walk over
// the predicates that were inserted by
// duplicate_predicates()/add_range_check_predicate()
+ static Node* skip_predicates_from_entry(Node* ctrl);
Node* skip_predicates();
#ifndef PRODUCT
@@ -588,6 +595,7 @@
void compute_trip_count(PhaseIdealLoop* phase);
// Compute loop trip count from profile data
+ float compute_profile_trip_cnt_helper(Node* n);
void compute_profile_trip_cnt( PhaseIdealLoop *phase );
// Reassociate invariant expressions.
@@ -732,9 +740,10 @@
}
Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
- void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii,
- IdealLoopTree* outer_loop, LoopNode* outer_main_head,
- uint dd_main_head);
+ void duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
+ LoopNode* outer_main_head, uint dd_main_head);
+ void duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
+ LoopNode* outer_main_head, uint dd_main_head);
public:
@@ -1073,6 +1082,7 @@
PhaseIterGVN* igvn);
Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
+ static Node* skip_all_loop_predicates(Node* entry);
static Node* skip_loop_predicates(Node* entry);
// Find a good location to insert a predicate
@@ -1087,12 +1097,20 @@
// Implementation of the loop predication to promote checks outside the loop
bool loop_predication_impl(IdealLoopTree *loop);
+ bool loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
+ CountedLoopNode *cl, ConNode* zero, Invariance& invar,
+ Deoptimization::DeoptReason reason);
+ bool loop_predication_should_follow_branches(IdealLoopTree *loop, ProjNode *predicate_proj, float& loop_trip_cnt);
+ void loop_predication_follow_branches(Node *c, IdealLoopTree *loop, float loop_trip_cnt,
+ PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
+ Node_List& if_proj_list);
ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
ProjNode* proj, ProjNode *predicate_proj,
ProjNode* upper_bound_proj,
int scale, Node* offset,
Node* init, Node* limit, jint stride,
- Node* rng, bool& overflow);
+ Node* rng, bool& overflow,
+ Deoptimization::DeoptReason reason);
Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
Node* predicate_proj, int scale_con, Node* offset,
Node* limit, jint stride_con);
--- a/src/hotspot/share/opto/node.hpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/node.hpp Tue Jun 19 09:08:39 2018 +0200
@@ -73,6 +73,7 @@
class FastLockNode;
class FastUnlockNode;
class IfNode;
+class IfProjNode;
class IfFalseNode;
class IfTrueNode;
class InitializeNode;
@@ -676,8 +677,9 @@
DEFINE_CLASS_ID(Proj, Node, 3)
DEFINE_CLASS_ID(CatchProj, Proj, 0)
DEFINE_CLASS_ID(JumpProj, Proj, 1)
- DEFINE_CLASS_ID(IfTrue, Proj, 2)
- DEFINE_CLASS_ID(IfFalse, Proj, 3)
+ DEFINE_CLASS_ID(IfProj, Proj, 2)
+ DEFINE_CLASS_ID(IfTrue, IfProj, 0)
+ DEFINE_CLASS_ID(IfFalse, IfProj, 1)
DEFINE_CLASS_ID(Parm, Proj, 4)
DEFINE_CLASS_ID(MachProj, Proj, 5)
@@ -818,6 +820,7 @@
DEFINE_CLASS_QUERY(FastUnlock)
DEFINE_CLASS_QUERY(If)
DEFINE_CLASS_QUERY(RangeCheck)
+ DEFINE_CLASS_QUERY(IfProj)
DEFINE_CLASS_QUERY(IfFalse)
DEFINE_CLASS_QUERY(IfTrue)
DEFINE_CLASS_QUERY(Initialize)
--- a/src/hotspot/share/opto/parse.hpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/parse.hpp Tue Jun 19 09:08:39 2018 +0200
@@ -161,6 +161,7 @@
bool _has_merged_backedge; // does this block have merged backedge?
SafePointNode* _start_map; // all values flowing into this block
MethodLivenessResult _live_locals; // lazily initialized liveness bitmap
+ bool _has_predicates; // Were predicates added before parsing of the loop head?
int _num_successors; // Includes only normal control flow.
int _all_successors; // Include exception paths also.
@@ -203,6 +204,9 @@
// True when all non-exception predecessors have been parsed.
bool is_ready() const { return preds_parsed() == pred_count(); }
+ bool has_predicates() const { return _has_predicates; }
+ void set_has_predicates() { _has_predicates = true; }
+
int num_successors() const { return _num_successors; }
int all_successors() const { return _all_successors; }
Block* successor_at(int i) const {
@@ -552,6 +556,7 @@
void sharpen_type_after_if(BoolTest::mask btest,
Node* con, const Type* tcon,
Node* val, const Type* tval);
+ void maybe_add_predicate_after_if(Block* path);
IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask, float prob, float cnt);
Node* jump_if_join(Node* iffalse, Node* iftrue);
void jump_if_true_fork(IfNode *ifNode, int dest_bci_if_true, int prof_table_index, bool unc);
--- a/src/hotspot/share/opto/parse1.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/parse1.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -666,10 +666,13 @@
if (block->is_SEL_head()) {
// Add predicate to single entry (not irreducible) loop head.
assert(!block->has_merged_backedge(), "only entry paths should be merged for now");
- // Need correct bci for predicate.
- // It is fine to set it here since do_one_block() will set it anyway.
- set_parse_bci(block->start());
- add_predicate();
+ // Predicates may have been added after a dominating if
+ if (!block->has_predicates()) {
+ // Need correct bci for predicate.
+ // It is fine to set it here since do_one_block() will set it anyway.
+ set_parse_bci(block->start());
+ add_predicate();
+ }
// Add new region for back branches.
int edges = block->pred_count() - block->preds_parsed() + 1; // +1 for original region
RegionNode *r = new RegionNode(edges+1);
@@ -1262,6 +1265,7 @@
_is_handler = false;
_has_merged_backedge = false;
_start_map = NULL;
+ _has_predicates = false;
_num_successors = 0;
_all_successors = 0;
_successors = NULL;
--- a/src/hotspot/share/opto/parse2.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/opto/parse2.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -1649,6 +1649,18 @@
return (seems_never_taken(prob) && seems_stable_comparison());
}
+void Parse::maybe_add_predicate_after_if(Block* path) {
+ if (path->is_SEL_head() && path->preds_parsed() == 0) {
+ // Add predicates at bci of if dominating the loop so traps can be
+ // recorded on the if's profile data
+ int bc_depth = repush_if_args();
+ add_predicate();
+ dec_sp(bc_depth);
+ path->set_has_predicates();
+ }
+}
+
+
//----------------------------adjust_map_after_if------------------------------
// Adjust the JVM state to reflect the result of taking this path.
// Basically, it means inspecting the CmpNode controlling this
@@ -1657,8 +1669,14 @@
// as graph nodes in the current abstract interpretation map.
void Parse::adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
Block* path, Block* other_path) {
- if (stopped() || !c->is_Cmp() || btest == BoolTest::illegal)
+ if (!c->is_Cmp()) {
+ maybe_add_predicate_after_if(path);
+ return;
+ }
+
+ if (stopped() || btest == BoolTest::illegal) {
return; // nothing to do
+ }
bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
@@ -1690,10 +1708,13 @@
have_con = false;
}
}
- if (!have_con) // remaining adjustments need a con
+ if (!have_con) { // remaining adjustments need a con
+ maybe_add_predicate_after_if(path);
return;
+ }
sharpen_type_after_if(btest, con, tcon, val, tval);
+ maybe_add_predicate_after_if(path);
}
--- a/src/hotspot/share/runtime/deoptimization.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/runtime/deoptimization.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -2170,6 +2170,7 @@
"array_check",
"intrinsic" JVMCI_ONLY("_or_type_checked_inlining"),
"bimorphic" JVMCI_ONLY("_or_optimized_type_check"),
+ "profile_predicate",
"unloaded",
"uninitialized",
"unreached",
--- a/src/hotspot/share/runtime/deoptimization.hpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/runtime/deoptimization.hpp Tue Jun 19 09:08:39 2018 +0200
@@ -41,7 +41,7 @@
enum DeoptReason {
Reason_many = -1, // indicates presence of several reasons
Reason_none = 0, // indicates absence of a relevant deopt.
- // Next 7 reasons are recorded per bytecode in DataLayout::trap_bits.
+ // Next 8 reasons are recorded per bytecode in DataLayout::trap_bits.
// This is more complicated for JVMCI as JVMCI may deoptimize to *some* bytecode before the
// bytecode that actually caused the deopt (with inlining, JVMCI may even deoptimize to a
// bytecode in another method):
@@ -62,6 +62,8 @@
Reason_optimized_type_check = Reason_bimorphic,
#endif
+ Reason_profile_predicate, // compiler generated predicate moved from frequent branch in a loop failed
+
// recorded per method
Reason_unloaded, // unloaded class or constant pool entry
Reason_uninitialized, // bad class state (uninitialized)
@@ -92,8 +94,8 @@
Reason_LIMIT,
// Note: Keep this enum in sync. with _trap_reason_name.
- Reason_RECORDED_LIMIT = Reason_bimorphic // some are not recorded per bc
- // Note: Reason_RECORDED_LIMIT should be < 8 to fit into 3 bits of
+ Reason_RECORDED_LIMIT = Reason_profile_predicate // some are not recorded per bc
+ // Note: Reason_RECORDED_LIMIT should fit into 31 bits of
// DataLayout::trap_bits. This dependency is enforced indirectly
// via asserts, to avoid excessive direct header-to-header dependencies.
// See Deoptimization::trap_state_reason and class DataLayout.
--- a/src/hotspot/share/runtime/vmStructs.cpp Tue Jun 19 08:44:31 2018 +0200
+++ b/src/hotspot/share/runtime/vmStructs.cpp Tue Jun 19 09:08:39 2018 +0200
@@ -2381,6 +2381,7 @@
declare_constant(Deoptimization::Reason_array_check) \
declare_constant(Deoptimization::Reason_intrinsic) \
declare_constant(Deoptimization::Reason_bimorphic) \
+ declare_constant(Deoptimization::Reason_profile_predicate) \
declare_constant(Deoptimization::Reason_unloaded) \
declare_constant(Deoptimization::Reason_uninitialized) \
declare_constant(Deoptimization::Reason_unreached) \