8159016: Over-unrolled loop is partially removed
Summary: Prevent over-unrolling of loops by computing upper bound for trip count.
Reviewed-by: kvn
--- a/hotspot/src/share/vm/opto/loopTransform.cpp Fri Jun 24 15:30:50 2016 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp Mon Jun 27 10:10:11 2016 +0200
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -70,9 +70,9 @@
}
//------------------------------compute_exact_trip_count-----------------------
-// Compute loop exact trip count if possible. Do not recalculate trip count for
+// Compute loop trip count if possible. Do not recalculate trip count for
// split loops (pre-main-post) which have their limits and inits behind Opaque node.
-void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
+void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
if (!_head->as_Loop()->is_valid_counted_loop()) {
return;
}
@@ -94,17 +94,21 @@
Node* init_n = cl->init_trip();
Node* limit_n = cl->limit();
- if (init_n != NULL && init_n->is_Con() &&
- limit_n != NULL && limit_n->is_Con()) {
+ if (init_n != NULL && limit_n != NULL) {
// Use longs to avoid integer overflow.
- int stride_con = cl->stride_con();
- jlong init_con = cl->init_trip()->get_int();
- jlong limit_con = cl->limit()->get_int();
- int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
+ int stride_con = cl->stride_con();
+ jlong init_con = phase->_igvn.type(init_n)->is_int()->_lo;
+ jlong limit_con = phase->_igvn.type(limit_n)->is_int()->_hi;
+ int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
if (trip_count > 0 && (julong)trip_count < (julong)max_juint) {
- // Set exact trip count.
- cl->set_exact_trip_count((uint)trip_count);
+ if (init_n->is_Con() && limit_n->is_Con()) {
+ // Set exact trip count.
+ cl->set_exact_trip_count((uint)trip_count);
+ } else if (cl->unrolled_count() == 1) {
+ // Set maximum trip count before unrolling.
+ cl->set_trip_count((uint)trip_count);
+ }
}
}
}
@@ -1305,7 +1309,7 @@
assert(main_exit->Opcode() == Op_IfFalse, "");
int dd_main_exit = dom_depth(main_exit);
- // Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
+ // Step A1: Clone the loop body of main. The clone becomes the post-loop.
// The main loop pre-header illegally has 2 control users (old & new loops).
clone_loop(loop, old_new, dd_main_exit);
assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
@@ -2095,8 +2099,7 @@
// the loop is in canonical form to multiversion.
closed_range_checks = 0;
- // Check loop body for tests of trip-counter plus loop-invariant vs
- // loop-invariant.
+ // Check loop body for tests of trip-counter plus loop-invariant vs loop-variant.
for( uint i = 0; i < loop->_body.size(); i++ ) {
Node *iff = loop->_body[i];
if (iff->Opcode() == Op_If ||
@@ -2298,7 +2301,7 @@
// skip this loop if it is already checked
if (cl->has_been_range_checked()) return;
- // Now check for existance of range checks
+ // Now check for existence of range checks
for (uint i = 0; i < loop->_body.size(); i++) {
Node *iff = loop->_body[i];
int iff_opc = iff->Opcode();
@@ -2319,7 +2322,7 @@
CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
assert(legacy_cl->is_post_loop(), "");
- // Check for existance of range checks using the unique instance to make a guard with
+ // Check for existence of range checks using the unique instance to make a guard with
Unique_Node_List worklist;
for (uint i = 0; i < legacy_loop->_body.size(); i++) {
Node *iff = legacy_loop->_body[i];
@@ -2422,7 +2425,7 @@
}
//-------------------------poison_rce_post_loop--------------------------------
-// Causes the rce'd post loop to be optimized away if multiverioning fails
+// Causes the rce'd post loop to be optimized away if multiversioning fails
void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
Node* ctrl = rce_cl->in(LoopNode::EntryControl);
@@ -2710,8 +2713,8 @@
//=============================================================================
//------------------------------iteration_split_impl---------------------------
bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
- // Compute exact loop trip count if possible.
- compute_exact_trip_count(phase);
+ // Compute loop trip count if possible.
+ compute_trip_count(phase);
// Convert one iteration loop into normal code.
if (policy_do_one_iteration_loop(phase))
--- a/hotspot/src/share/vm/opto/loopnode.hpp Fri Jun 24 15:30:50 2016 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp Mon Jun 27 10:10:11 2016 +0200
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -520,8 +520,8 @@
// Return TRUE if "iff" is a range check.
bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
- // Compute loop exact trip count if possible
- void compute_exact_trip_count( PhaseIdealLoop *phase );
+ // Compute loop trip count if possible
+ void compute_trip_count(PhaseIdealLoop* phase);
// Compute loop trip count from profile data
void compute_profile_trip_cnt( PhaseIdealLoop *phase );
--- a/hotspot/src/share/vm/opto/macro.cpp Fri Jun 24 15:30:50 2016 -0700
+++ b/hotspot/src/share/vm/opto/macro.cpp Mon Jun 27 10:10:11 2016 +0200
@@ -1596,8 +1596,12 @@
// All nodes that depended on the InitializeNode for control
// and memory must now depend on the MemBarNode that itself
// depends on the InitializeNode
- _igvn.replace_node(init_ctrl, ctrl);
- _igvn.replace_node(init_mem, mem);
+ if (init_ctrl != NULL) {
+ _igvn.replace_node(init_ctrl, ctrl);
+ }
+ if (init_mem != NULL) {
+ _igvn.replace_node(init_mem, mem);
+ }
}
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/loopopts/TestOverunrolling.java Mon Jun 27 10:10:11 2016 +0200
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8159016
+ * @requires vm.gc == "Parallel" | vm.gc == "null"
+ * @summary Tests correct dominator information after over-unrolling a loop.
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:-TieredCompilation
+ * -XX:-UseG1GC -XX:+UseParallelGC TestOverunrolling
+ */
+public class TestOverunrolling {
+
+ public static Object test(int arg) {
+ Object arr[] = new Object[3];
+ int lim = (arg & 3);
+ // The pre loop is executed for one iteration, initializing p[0].
+ // The main loop is unrolled twice, initializing p[1], p[2], p[3] and p[4].
+ // The p[3] and p[4] stores are always out of bounds and removed. However,
+ // C2 is unable to remove the "over-unrolled", dead main loop. As a result,
+ // there is a control path from the main loop to the post loop without a
+ // memory path (because the last store was replaced by TOP). We crash
+ // because we use a memory edge from a non-dominating region.
+ for (int i = 0; i < lim; ++i) {
+ arr[i] = new Object();
+ }
+ // Avoid EA
+ return arr;
+ }
+
+ public static void main(String args[]) {
+ for (int i = 0; i < 42; ++i) {
+ test(i);
+ }
+ }
+}
+