7197327: 40% regression on 8 b41 comp 8 b40 on specjvm2008.mpegaudio on oob
Summary: Add support for expensive nodes.
Reviewed-by: kvn
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Tue Feb 12 12:56:11 2013 +0100
@@ -618,6 +618,9 @@
\
product(intx, LiveNodeCountInliningCutoff, 20000, \
"max number of live nodes in a method") \
+ \
+ diagnostic(bool, OptimizeExpensiveOps, true, \
+ "Find best control for expensive operations") \
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/hotspot/src/share/vm/opto/compile.cpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/compile.cpp Tue Feb 12 12:56:11 2013 +0100
@@ -409,6 +409,13 @@
remove_macro_node(n);
}
}
+ // Remove useless expensive node
+ for (int i = C->expensive_count()-1; i >= 0; i--) {
+ Node* n = C->expensive_node(i);
+ if (!useful.member(n)) {
+ remove_expensive_node(n);
+ }
+ }
// clean up the late inline lists
remove_useless_late_inlines(&_string_late_inlines, useful);
remove_useless_late_inlines(&_late_inlines, useful);
@@ -1061,6 +1068,7 @@
_intrinsics = NULL;
_macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
_predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
+ _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
register_library_intrinsics();
}
@@ -1927,6 +1935,10 @@
if (failing()) return;
+ // No more new expensive nodes will be added to the list from here
+ // so keep only the actual candidates for optimizations.
+ cleanup_expensive_nodes(igvn);
+
// Perform escape analysis
if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
if (has_loops()) {
@@ -3010,6 +3022,15 @@
return true;
}
+ // Expensive nodes have their control input set to prevent the GVN
+ // from freely commoning them. There's no GVN beyond this point so
+ // no need to keep the control input. We want the expensive nodes to
+ // be freely moved to the least frequent code path by gcm.
+ assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");
+ for (int i = 0; i < expensive_count(); i++) {
+ _expensive_nodes->at(i)->set_req(0, NULL);
+ }
+
Final_Reshape_Counts frc;
// Visit everybody reachable!
@@ -3525,3 +3546,126 @@
}
}
}
+
+int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
+ if (n1->Opcode() < n2->Opcode()) return -1;
+ else if (n1->Opcode() > n2->Opcode()) return 1;
+
+ assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));
+ for (uint i = 1; i < n1->req(); i++) {
+ if (n1->in(i) < n2->in(i)) return -1;
+ else if (n1->in(i) > n2->in(i)) return 1;
+ }
+
+ return 0;
+}
+
+int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {
+ Node* n1 = *n1p;
+ Node* n2 = *n2p;
+
+ return cmp_expensive_nodes(n1, n2);
+}
+
+void Compile::sort_expensive_nodes() {
+ if (!expensive_nodes_sorted()) {
+ _expensive_nodes->sort(cmp_expensive_nodes);
+ }
+}
+
+bool Compile::expensive_nodes_sorted() const {
+ for (int i = 1; i < _expensive_nodes->length(); i++) {
+ if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {
+ if (_expensive_nodes->length() == 0) {
+ return false;
+ }
+
+ assert(OptimizeExpensiveOps, "optimization off?");
+
+ // Take this opportunity to remove dead nodes from the list
+ int j = 0;
+ for (int i = 0; i < _expensive_nodes->length(); i++) {
+ Node* n = _expensive_nodes->at(i);
+ if (!n->is_unreachable(igvn)) {
+ assert(n->is_expensive(), "should be expensive");
+ _expensive_nodes->at_put(j, n);
+ j++;
+ }
+ }
+ _expensive_nodes->trunc_to(j);
+
+ // Then sort the list so that similar nodes are next to each other
+ // and check for at least two nodes of identical kind with same data
+ // inputs.
+ sort_expensive_nodes();
+
+ for (int i = 0; i < _expensive_nodes->length()-1; i++) {
+ if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
+ if (_expensive_nodes->length() == 0) {
+ return;
+ }
+
+ assert(OptimizeExpensiveOps, "optimization off?");
+
+ // Sort to bring similar nodes next to each other and clear the
+ // control input of nodes for which there's only a single copy.
+ sort_expensive_nodes();
+
+ int j = 0;
+ int identical = 0;
+ int i = 0;
+ for (; i < _expensive_nodes->length()-1; i++) {
+ assert(j <= i, "can't write beyond current index");
+ if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
+ identical++;
+ _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
+ continue;
+ }
+ if (identical > 0) {
+ _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
+ identical = 0;
+ } else {
+ Node* n = _expensive_nodes->at(i);
+ igvn.hash_delete(n);
+ n->set_req(0, NULL);
+ igvn.hash_insert(n);
+ }
+ }
+ if (identical > 0) {
+ _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
+ } else if (_expensive_nodes->length() >= 1) {
+ Node* n = _expensive_nodes->at(i);
+ igvn.hash_delete(n);
+ n->set_req(0, NULL);
+ igvn.hash_insert(n);
+ }
+ _expensive_nodes->trunc_to(j);
+}
+
+void Compile::add_expensive_node(Node * n) {
+ assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");
+ assert(n->is_expensive(), "expensive nodes with non-null control here only");
+ assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");
+ if (OptimizeExpensiveOps) {
+ _expensive_nodes->append(n);
+ } else {
+ // Clear control input and let IGVN optimize expensive nodes if
+ // OptimizeExpensiveOps is off.
+ n->set_req(0, NULL);
+ }
+}
--- a/hotspot/src/share/vm/opto/compile.hpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/compile.hpp Tue Feb 12 12:56:11 2013 +0100
@@ -314,6 +314,7 @@
GrowableArray<CallGenerator*>* _intrinsics; // List of intrinsics.
GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching.
GrowableArray<Node*>* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates.
+ GrowableArray<Node*>* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
ConnectionGraph* _congraph;
#ifndef PRODUCT
IdealGraphPrinter* _printer;
@@ -398,6 +399,13 @@
GrowableArray<PrintInliningBuffer>* _print_inlining_list;
int _print_inlining;
+ // Only keep nodes in the expensive node list that need to be optimized
+ void cleanup_expensive_nodes(PhaseIterGVN &igvn);
+ // Use for sorting expensive nodes to bring similar nodes together
+ static int cmp_expensive_nodes(Node** n1, Node** n2);
+ // Expensive nodes list already sorted?
+ bool expensive_nodes_sorted() const;
+
public:
outputStream* print_inlining_stream() const {
@@ -573,8 +581,10 @@
int macro_count() { return _macro_nodes->length(); }
int predicate_count() { return _predicate_opaqs->length();}
+ int expensive_count() { return _expensive_nodes->length(); }
Node* macro_node(int idx) { return _macro_nodes->at(idx); }
Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
+ Node* expensive_node(int idx) { return _expensive_nodes->at(idx); }
ConnectionGraph* congraph() { return _congraph;}
void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;}
void add_macro_node(Node * n) {
@@ -592,6 +602,12 @@
_predicate_opaqs->remove(n);
}
}
+ void add_expensive_node(Node * n);
+ void remove_expensive_node(Node * n) {
+ if (_expensive_nodes->contains(n)) {
+ _expensive_nodes->remove(n);
+ }
+ }
void add_predicate_opaq(Node * n) {
assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
assert(_macro_nodes->contains(n), "should have already been in macro list");
@@ -604,6 +620,13 @@
return _predicate_opaqs->contains(n);
}
+ // Are there candidate expensive nodes for optimization?
+ bool should_optimize_expensive_nodes(PhaseIterGVN &igvn);
+ // Check whether n1 and n2 are similar
+ static int cmp_expensive_nodes(Node* n1, Node* n2);
+ // Sort expensive nodes to locate similar expensive nodes
+ void sort_expensive_nodes();
+
// Compilation environment.
Arena* comp_arena() { return &_comp_arena; }
ciEnv* env() const { return _env; }
--- a/hotspot/src/share/vm/opto/library_call.cpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/library_call.cpp Tue Feb 12 12:56:11 2013 +0100
@@ -1653,7 +1653,7 @@
// really odd corner cases (+/- Infinity). Just uncommon-trap them.
bool LibraryCallKit::inline_exp() {
Node* arg = round_double_node(argument(0));
- Node* n = _gvn.transform(new (C) ExpDNode(0, arg));
+ Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg));
finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
@@ -1688,7 +1688,7 @@
if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
// Short form: skip the fancy tests and just check for NaN result.
- result = _gvn.transform(new (C) PowDNode(0, x, y));
+ result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
} else {
// If this inlining ever returned NaN in the past, include all
// checks + call to the runtime.
@@ -1715,7 +1715,7 @@
Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) );
// Set fast path result
- Node *fast_result = _gvn.transform( new (C) PowDNode(0, x, y) );
+ Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) );
phi->init_req(3, fast_result);
// Complex path
@@ -1775,7 +1775,7 @@
// abs(x)
Node *absx=_gvn.transform( new (C) AbsDNode(x));
// abs(x)^y
- Node *absxpowy = _gvn.transform( new (C) PowDNode(0, absx, y) );
+ Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) );
// -abs(x)^y
Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
// (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
--- a/hotspot/src/share/vm/opto/loopnode.cpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/loopnode.cpp Tue Feb 12 12:56:11 2013 +0100
@@ -88,9 +88,9 @@
assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
uint i;
Node *early;
- if( n->in(0) ) {
+ if (n->in(0) && !n->is_expensive()) {
early = n->in(0);
- if( !early->is_CFG() ) // Might be a non-CFG multi-def
+ if (!early->is_CFG()) // Might be a non-CFG multi-def
early = get_ctrl(early); // So treat input as a straight data input
i = 1;
} else {
@@ -99,28 +99,28 @@
}
uint e_d = dom_depth(early);
assert( early, "" );
- for( ; i < n->req(); i++ ) {
+ for (; i < n->req(); i++) {
Node *cin = get_ctrl(n->in(i));
assert( cin, "" );
// Keep deepest dominator depth
uint c_d = dom_depth(cin);
- if( c_d > e_d ) { // Deeper guy?
+ if (c_d > e_d) { // Deeper guy?
early = cin; // Keep deepest found so far
e_d = c_d;
- } else if( c_d == e_d && // Same depth?
- early != cin ) { // If not equal, must use slower algorithm
+ } else if (c_d == e_d && // Same depth?
+ early != cin) { // If not equal, must use slower algorithm
// If same depth but not equal, one _must_ dominate the other
// and we want the deeper (i.e., dominated) guy.
Node *n1 = early;
Node *n2 = cin;
- while( 1 ) {
+ while (1) {
n1 = idom(n1); // Walk up until break cycle
n2 = idom(n2);
- if( n1 == cin || // Walked early up to cin
- dom_depth(n2) < c_d )
+ if (n1 == cin || // Walked early up to cin
+ dom_depth(n2) < c_d)
break; // early is deeper; keep him
- if( n2 == early || // Walked cin up to early
- dom_depth(n1) < c_d ) {
+ if (n2 == early || // Walked cin up to early
+ dom_depth(n1) < c_d) {
early = cin; // cin is deeper; keep him
break;
}
@@ -132,9 +132,108 @@
// Return earliest legal location
assert(early == find_non_split_ctrl(early), "unexpected early control");
+ if (n->is_expensive()) {
+ assert(n->in(0), "should have control input");
+ early = get_early_ctrl_for_expensive(n, early);
+ }
+
return early;
}
+//------------------------------get_early_ctrl_for_expensive---------------------------------
+// Move node up the dominator tree as high as legal while still beneficial
+Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
+ assert(n->in(0) && n->is_expensive(), "expensive node with control input here");
+ assert(OptimizeExpensiveOps, "optimization off?");
+
+ Node* ctl = n->in(0);
+ assert(ctl->is_CFG(), "expensive input 0 must be cfg");
+ uint min_dom_depth = dom_depth(earliest);
+#ifdef ASSERT
+ if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) {
+ dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl);
+ assert(false, "Bad graph detected in get_early_ctrl_for_expensive");
+ }
+#endif
+ if (dom_depth(ctl) < min_dom_depth) {
+ return earliest;
+ }
+
+ while (1) {
+ Node *next = ctl;
+ // Moving the node out of a loop on the projection of a If
+ // confuses loop predication. So once we hit a Loop in a If branch
+ // that doesn't branch to an UNC, we stop. The code that process
+ // expensive nodes will notice the loop and skip over it to try to
+ // move the node further up.
+ if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) {
+ if (!is_uncommon_trap_if_pattern(ctl->in(1)->as_Proj(), Deoptimization::Reason_none)) {
+ break;
+ }
+ next = idom(ctl->in(1)->in(0));
+ } else if (ctl->is_Proj()) {
+ // We only move it up along a projection if the projection is
+ // the single control projection for its parent: same code path,
+ // if it's a If with UNC or fallthrough of a call.
+ Node* parent_ctl = ctl->in(0);
+ if (parent_ctl == NULL) {
+ break;
+ } else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) {
+ next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control();
+ } else if (parent_ctl->is_If()) {
+ if (!is_uncommon_trap_if_pattern(ctl->as_Proj(), Deoptimization::Reason_none)) {
+ break;
+ }
+ assert(idom(ctl) == parent_ctl, "strange");
+ next = idom(parent_ctl);
+ } else if (ctl->is_CatchProj()) {
+ if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) {
+ break;
+ }
+ assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph");
+ next = parent_ctl->in(0)->in(0)->in(0);
+ } else {
+ // Check if parent control has a single projection (this
+ // control is the only possible successor of the parent
+ // control). If so, we can try to move the node above the
+ // parent control.
+ int nb_ctl_proj = 0;
+ for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) {
+ Node *p = parent_ctl->fast_out(i);
+ if (p->is_Proj() && p->is_CFG()) {
+ nb_ctl_proj++;
+ if (nb_ctl_proj > 1) {
+ break;
+ }
+ }
+ }
+
+ if (nb_ctl_proj > 1) {
+ break;
+ }
+ assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node");
+ assert(idom(ctl) == parent_ctl, "strange");
+ next = idom(parent_ctl);
+ }
+ } else {
+ next = idom(ctl);
+ }
+ if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) {
+ break;
+ }
+ ctl = next;
+ }
+
+ if (ctl != n->in(0)) {
+ _igvn.hash_delete(n);
+ n->set_req(0, ctl);
+ _igvn.hash_insert(n);
+ }
+
+ return ctl;
+}
+
+
//------------------------------set_early_ctrl---------------------------------
// Set earliest legal control
void PhaseIdealLoop::set_early_ctrl( Node *n ) {
@@ -1892,6 +1991,98 @@
}
}
+//------------------------process_expensive_nodes-----------------------------
+// Expensive nodes have their control input set to prevent the GVN
+// from commoning them and as a result forcing the resulting node to
+// be in a more frequent path. Use CFG information here, to change the
+// control inputs so that some expensive nodes can be commoned while
+// not executed more frequently.
+bool PhaseIdealLoop::process_expensive_nodes() {
+ assert(OptimizeExpensiveOps, "optimization off?");
+
+ // Sort nodes to bring similar nodes together
+ C->sort_expensive_nodes();
+
+ bool progress = false;
+
+ for (int i = 0; i < C->expensive_count(); ) {
+ Node* n = C->expensive_node(i);
+ int start = i;
+ // Find nodes similar to n
+ i++;
+ for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++);
+ int end = i;
+ // And compare them two by two
+ for (int j = start; j < end; j++) {
+ Node* n1 = C->expensive_node(j);
+ if (is_node_unreachable(n1)) {
+ continue;
+ }
+ for (int k = j+1; k < end; k++) {
+ Node* n2 = C->expensive_node(k);
+ if (is_node_unreachable(n2)) {
+ continue;
+ }
+
+ assert(n1 != n2, "should be pair of nodes");
+
+ Node* c1 = n1->in(0);
+ Node* c2 = n2->in(0);
+
+ Node* parent_c1 = c1;
+ Node* parent_c2 = c2;
+
+ // The call to get_early_ctrl_for_expensive() moves the
+ // expensive nodes up but stops at loops that are in a if
+ // branch. See whether we can exit the loop and move above the
+ // If.
+ if (c1->is_Loop()) {
+ parent_c1 = c1->in(1);
+ }
+ if (c2->is_Loop()) {
+ parent_c2 = c2->in(1);
+ }
+
+ if (parent_c1 == parent_c2) {
+ _igvn._worklist.push(n1);
+ _igvn._worklist.push(n2);
+ continue;
+ }
+
+ // Look for identical expensive node up the dominator chain.
+ if (is_dominator(c1, c2)) {
+ c2 = c1;
+ } else if (is_dominator(c2, c1)) {
+ c1 = c2;
+ } else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() &&
+ parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) {
+ // Both branches have the same expensive node so move it up
+ // before the if.
+ c1 = c2 = idom(parent_c1->in(0));
+ }
+ // Do the actual moves
+ if (n1->in(0) != c1) {
+ _igvn.hash_delete(n1);
+ n1->set_req(0, c1);
+ _igvn.hash_insert(n1);
+ _igvn._worklist.push(n1);
+ progress = true;
+ }
+ if (n2->in(0) != c2) {
+ _igvn.hash_delete(n2);
+ n2->set_req(0, c2);
+ _igvn.hash_insert(n2);
+ _igvn._worklist.push(n2);
+ progress = true;
+ }
+ }
+ }
+ }
+
+ return progress;
+}
+
+
//=============================================================================
//----------------------------build_and_optimize-------------------------------
// Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to
@@ -1960,7 +2151,9 @@
}
// Nothing to do, so get out
- if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) {
+ bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only;
+ bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn);
+ if (stop_early && !do_expensive_nodes) {
_igvn.optimize(); // Cleanup NeverBranches
return;
}
@@ -2058,6 +2251,21 @@
return;
}
+ if (stop_early) {
+ assert(do_expensive_nodes, "why are we here?");
+ if (process_expensive_nodes()) {
+ // If we made some progress when processing expensive nodes then
+ // the IGVN may modify the graph in a way that will allow us to
+ // make some more progress: we need to try processing expensive
+ // nodes again.
+ C->set_major_progress();
+ }
+
+ _igvn.optimize();
+
+ return;
+ }
+
// Some parser-inserted loop predicates could never be used by loop
// predication or they were moved away from loop during some optimizations.
// For example, peeling. Eliminate them before next loop optimizations.
@@ -2120,6 +2328,10 @@
NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
}
+ if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
+ C->set_major_progress();
+ }
+
// Perform loop predication before iteration splitting
if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) {
_ltree_root->_child->loop_predication(this);
@@ -3299,7 +3511,7 @@
#ifdef ASSERT
if (legal->is_Start() && !early->is_Root()) {
// Bad graph. Print idom path and fail.
- dump_bad_graph(n, early, LCA);
+ dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA);
assert(false, "Bad graph detected in build_loop_late");
}
#endif
@@ -3350,8 +3562,8 @@
}
#ifdef ASSERT
-void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) {
- tty->print_cr( "Bad graph detected in build_loop_late");
+void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) {
+ tty->print_cr(msg);
tty->print("n: "); n->dump();
tty->print("early(n): "); early->dump();
if (n->in(0) != NULL && !n->in(0)->is_top() &&
--- a/hotspot/src/share/vm/opto/loopnode.hpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/loopnode.hpp Tue Feb 12 12:56:11 2013 +0100
@@ -263,9 +263,18 @@
bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); }
BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; }
CountedLoopNode *loopnode() const {
+ // The CountedLoopNode that goes with this CountedLoopEndNode may
+ // have been optimized out by the IGVN so be cautious with the
+ // pattern matching on the graph
+ if (phi() == NULL) {
+ return NULL;
+ }
Node *ln = phi()->in(0);
- assert( ln->Opcode() == Op_CountedLoop, "malformed loop" );
- return (CountedLoopNode*)ln; }
+ if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) {
+ return (CountedLoopNode*)ln;
+ }
+ return NULL;
+ }
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
@@ -598,6 +607,7 @@
// check if transform created new nodes that need _ctrl recorded
Node *get_late_ctrl( Node *n, Node *early );
Node *get_early_ctrl( Node *n );
+ Node *get_early_ctrl_for_expensive(Node *n, Node* earliest);
void set_early_ctrl( Node *n );
void set_subtree_ctrl( Node *root );
void set_ctrl( Node *n, Node *ctrl ) {
@@ -905,6 +915,16 @@
void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
void eliminate_useless_predicates();
+ // Change the control input of expensive nodes to allow commoning by
+ // IGVN when it is guaranteed to not result in a more frequent
+ // execution of the expensive node. Return true if progress.
+ bool process_expensive_nodes();
+
+ // Check whether node has become unreachable
+ bool is_node_unreachable(Node *n) const {
+ return !has_node(n) || n->is_unreachable(_igvn);
+ }
+
// Eliminate range-checks and other trip-counter vs loop-invariant tests.
void do_range_check( IdealLoopTree *loop, Node_List &old_new );
@@ -1043,7 +1063,7 @@
void register_new_node( Node *n, Node *blk );
#ifdef ASSERT
-void dump_bad_graph(Node* n, Node* early, Node* LCA);
+ void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
#endif
#ifndef PRODUCT
--- a/hotspot/src/share/vm/opto/node.cpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/node.cpp Tue Feb 12 12:56:11 2013 +0100
@@ -493,6 +493,8 @@
}
if (is_macro())
compile->add_macro_node(n);
+ if (is_expensive())
+ compile->add_expensive_node(n);
n->set_idx(compile->next_unique()); // Get new unique index as well
debug_only( n->verify_construction() );
@@ -616,6 +618,9 @@
if (is_macro()) {
compile->remove_macro_node(this);
}
+ if (is_expensive()) {
+ compile->remove_expensive_node(this);
+ }
#ifdef ASSERT
// We will not actually delete the storage, but we'll make the node unusable.
*(address*)this = badAddress; // smash the C++ vtbl, probably
@@ -689,6 +694,13 @@
}
#endif
+
+//------------------------------is_unreachable---------------------------------
+bool Node::is_unreachable(PhaseIterGVN &igvn) const {
+ assert(!is_Mach(), "doesn't work with MachNodes");
+ return outcnt() == 0 || igvn.type(this) == Type::TOP || in(0)->is_top();
+}
+
//------------------------------add_req----------------------------------------
// Add a new required input at the end
void Node::add_req( Node *n ) {
@@ -1246,6 +1258,9 @@
if (dead->is_macro()) {
igvn->C->remove_macro_node(dead);
}
+ if (dead->is_expensive()) {
+ igvn->C->remove_expensive_node(dead);
+ }
// Kill all inputs to the dead guy
for (uint i=0; i < dead->req(); i++) {
Node *n = dead->in(i); // Get input to dead guy
--- a/hotspot/src/share/vm/opto/node.hpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/node.hpp Tue Feb 12 12:56:11 2013 +0100
@@ -378,6 +378,8 @@
bool is_dead() const;
#define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
#endif
+ // Check whether node has become unreachable
+ bool is_unreachable(PhaseIterGVN &igvn) const;
// Set a required input edge, also updates corresponding output edge
void add_req( Node *n ); // Append a NEW required input
@@ -646,7 +648,8 @@
Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
Flag_avoid_back_to_back = Flag_may_be_short_branch << 1,
Flag_has_call = Flag_avoid_back_to_back << 1,
- _max_flags = (Flag_has_call << 1) - 1 // allow flags combination
+ Flag_is_expensive = Flag_has_call << 1,
+ _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
};
private:
@@ -819,6 +822,8 @@
// The node is a "macro" node which needs to be expanded before matching
bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
+ // The node is expensive: the best control is set during loop opts
+ bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; }
//----------------- Optimization
--- a/hotspot/src/share/vm/opto/phaseX.cpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/phaseX.cpp Tue Feb 12 12:56:11 2013 +0100
@@ -1203,6 +1203,9 @@
if (dead->is_macro()) {
C->remove_macro_node(dead);
}
+ if (dead->is_expensive()) {
+ C->remove_expensive_node(dead);
+ }
if (recurse) {
continue;
--- a/hotspot/src/share/vm/opto/subnode.hpp Mon Feb 11 14:47:04 2013 -0800
+++ b/hotspot/src/share/vm/opto/subnode.hpp Tue Feb 12 12:56:11 2013 +0100
@@ -456,7 +456,10 @@
// Exponentiate a double
class ExpDNode : public Node {
public:
- ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {}
+ ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
+ init_flags(Flag_is_expensive);
+ C->add_expensive_node(this);
+ }
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
@@ -489,7 +492,10 @@
// Raise a double to a double power
class PowDNode : public Node {
public:
- PowDNode(Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {}
+ PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {
+ init_flags(Flag_is_expensive);
+ C->add_expensive_node(this);
+ }
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }