--- a/src/hotspot/share/opto/loopTransform.cpp Mon Jun 03 10:51:28 2019 +0200
+++ b/src/hotspot/share/opto/loopTransform.cpp Tue May 28 14:56:58 2019 +0200
@@ -286,6 +286,9 @@
Node* n2 = n1->in(3 - inv1_idx);
int inv2_idx = is_invariant_addition(n2, phase);
if (!inv2_idx) return NULL;
+
+ if (!phase->may_require_nodes(10, 10)) return NULL;
+
Node* x = n2->in(3 - inv2_idx);
Node* inv2 = n2->in(inv2_idx);
@@ -337,61 +340,72 @@
Node* nn = reassociate_add_sub(n, phase);
if (nn == NULL) break;
n = nn; // again
- };
+ }
}
}
//------------------------------policy_peeling---------------------------------
-// Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
-// make some loop-invariant test (usually a null-check) happen before the loop.
-bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
- IdealLoopTree *loop = (IdealLoopTree*)this;
+// Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
+// is applicable if we can make a loop-invariant test (usually a null-check)
+// execute before we enter the loop. When TRUE, the estimated node budget is
+// also requested.
+bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
+ uint estimate = estimate_peeling(phase);
+
+ return estimate == 0 ? false : phase->may_require_nodes(estimate);
+}
+
+// Perform actual policy and size estimate for the loop peeling transform, and
+// return the estimated loop size if peeling is applicable, otherwise return
+// zero. No node budget is allocated.
+uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
- uint body_size = loop->_body.size();
- // Peeling does loop cloning which can result in O(N^2) node construction
- if (body_size > 255) {
- return false; // Prevent overflow for large body size
+ // Peeling does loop cloning which can result in O(N^2) node construction.
+ if (_body.size() > 255) {
+ return 0; // Suppress too large body size.
}
- uint estimate = body_size * body_size;
+ // Optimistic estimate that approximates loop body complexity via data and
+ // control flow fan-out (instead of using the more pessimistic: BodySize^2).
+ uint estimate = est_loop_clone_sz(2);
+
if (phase->exceeding_node_budget(estimate)) {
- return false; // Too large to safely clone
+ return 0; // Too large to safely clone.
}
- // check for vectorized loops, any peeling done was already applied
+ // Check for vectorized loops, any peeling done was already applied.
if (_head->is_CountedLoop()) {
CountedLoopNode* cl = _head->as_CountedLoop();
if (cl->is_unroll_only() || cl->trip_count() == 1) {
- return false;
+ return 0;
}
}
- Node* test = loop->tail();
-
- while (test != _head) { // Scan till run off top of loop
- if (test->is_If()) { // Test?
+ Node* test = tail();
+
+ while (test != _head) { // Scan till run off top of loop
+ if (test->is_If()) { // Test?
Node *ctrl = phase->get_ctrl(test->in(1));
if (ctrl->is_top()) {
- return false; // Found dead test on live IF? No peeling!
+ return 0; // Found dead test on live IF? No peeling!
}
- // Standard IF only has one input value to check for loop invariance
+ // Standard IF only has one input value to check for loop invariance.
assert(test->Opcode() == Op_If ||
test->Opcode() == Op_CountedLoopEnd ||
test->Opcode() == Op_RangeCheck,
"Check this code when new subtype is added");
// Condition is not a member of this loop?
if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
- // Found reason to peel!
- return phase->may_require_nodes(estimate);
+ return estimate; // Found reason to peel!
}
}
- // Walk up dominators to loop _head looking for test which is
- // executed on every path thru loop.
+ // Walk up dominators to loop _head looking for test which is executed on
+ // every path through the loop.
test = phase->idom(test);
}
- return false;
+ return 0;
}
//------------------------------peeled_dom_test_elim---------------------------
@@ -638,8 +652,8 @@
}
}
-
// Step 4: Correct dom-depth info. Set to loop-head depth.
+
int dd = dom_depth(head);
set_idom(head, head->in(1), dd);
for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
@@ -657,11 +671,30 @@
loop->record_for_igvn();
}
-#define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop
+// The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
+// where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants,
+// respectively, used to ensure that node usage estimates made are on the safe
+// side, for the most part. This is a simplified version of the loop clone
+// size calculation in est_loop_clone_sz(), defined for unroll factors larger
+// than one (>1), performing an overflow check and returning 'UINT_MAX' in
+// case of an overflow.
+static uint est_loop_unroll_sz(uint factor, uint size) {
+ precond(0 < factor);
+
+ uint const bc = 5;
+ uint const cc = 7;
+ uint const sz = size + (size + 15) / 16;
+ uint estimate = factor * (sz + bc) + cc;
+
+ return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
+}
+
+#define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop.
//------------------------------policy_maximally_unroll------------------------
-// Calculate exact loop trip count and return true if loop can be maximally
-// unrolled.
+// Calculate the exact loop trip-count and return TRUE if loop can be fully,
+// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
+// node budget is also requested.
bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop(), "");
@@ -693,7 +726,7 @@
// Take into account that after unroll conjoined heads and tails will fold,
// otherwise policy_unroll() may allow more unrolling than max unrolling.
- uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
+ uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
return false;
@@ -742,8 +775,9 @@
//------------------------------policy_unroll----------------------------------
-// Return TRUE or FALSE if the loop should be unrolled or not. Unroll if the
-// loop is a CountedLoop and the body is small enough.
+// Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
+// the loop is a counted loop and the loop body is small enough. When TRUE,
+// the estimated node budget is also requested.
bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
CountedLoopNode *cl = _head->as_CountedLoop();
@@ -887,7 +921,7 @@
LoopMaxUnroll = slp_max_unroll_factor;
}
- uint estimate = est_loop_clone_sz(2, body_size);
+ uint estimate = est_loop_clone_sz(2);
if (cl->has_passed_slp()) {
if (slp_max_unroll_factor >= future_unroll_cnt) {
@@ -958,8 +992,10 @@
}
//------------------------------policy_range_check-----------------------------
-// Return TRUE or FALSE if the loop should be range-check-eliminated.
-// Actually we do iteration-splitting, a more powerful form of RCE.
+// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
+// When TRUE, the estimated node budget is also requested.
+//
+// We will actually perform iteration-splitting, a more powerful form of RCE.
bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
if (!RangeCheckElimination) return false;
@@ -967,9 +1003,9 @@
assert(!phase->exceeding_node_budget(), "sanity");
CountedLoopNode *cl = _head->as_CountedLoop();
- // If we unrolled with no intention of doing RCE and we later
- // changed our minds, we got no pre-loop. Either we need to
- // make a new pre-loop, or we gotta disallow RCE.
+ // If we unrolled with no intention of doing RCE and we later changed our
+ // minds, we got no pre-loop. Either we need to make a new pre-loop, or we
+ // have to disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
Node *trip_counter = cl->phi();
@@ -1016,13 +1052,13 @@
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
continue;
}
- // Found a test like 'trip+off vs limit'. Test is an IfNode, has two
- // (2) projections. If BOTH are in the loop we need loop unswitching
- // instead of iteration splitting.
+ // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
+ // projections. If BOTH are in the loop we need loop unswitching instead
+ // of iteration splitting.
if (is_loop_exit(iff)) {
// Found valid reason to split iterations (if there is room).
// NOTE: Usually a gross overestimate.
- return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
+ return phase->may_require_nodes(est_loop_clone_sz(2));
}
} // End of is IF
}
@@ -1521,9 +1557,6 @@
// only process vectorized main loops
if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
- if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
- return;
- }
int slp_max_unroll_factor = cl->slp_max_unroll();
int cur_unroll = cl->unrolled_count();
@@ -1535,6 +1568,10 @@
// we only ever process this one time
if (cl->has_atomic_post_loop()) return;
+ if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
+ return;
+ }
+
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("PostVector ");
@@ -3178,9 +3215,6 @@
AutoNodeBudget node_budget(phase);
- bool should_peel = policy_peeling(phase);
- bool should_unswitch = policy_unswitching(phase);
-
// Non-counted loops may be peeled; exactly 1 iteration is peeled.
// This removes loop-invariant tests (usually null checks).
if (!_head->is_CountedLoop()) { // Non-counted loop
@@ -3188,10 +3222,10 @@
// Partial peel succeeded so terminate this round of loop opts
return false;
}
- if (should_peel) { // Should we peel?
+ if (policy_peeling(phase)) { // Should we peel?
if (PrintOpto) { tty->print_cr("should_peel"); }
- phase->do_peeling(this,old_new);
- } else if (should_unswitch) {
+ phase->do_peeling(this, old_new);
+ } else if (policy_unswitching(phase)) {
phase->do_unswitching(this, old_new);
}
return true;
@@ -3209,12 +3243,11 @@
// Before attempting fancy unrolling, RCE or alignment, see if we want
// to completely unroll this loop or do loop unswitching.
if (cl->is_normal_loop()) {
- if (should_unswitch) {
+ if (policy_unswitching(phase)) {
phase->do_unswitching(this, old_new);
return true;
}
- bool should_maximally_unroll = policy_maximally_unroll(phase);
- if (should_maximally_unroll) {
+ if (policy_maximally_unroll(phase)) {
// Here we did some unrolling and peeling. Eventually we will
// completely unroll this loop and it will no longer be a loop.
phase->do_maximally_unroll(this, old_new);
@@ -3222,6 +3255,9 @@
}
}
+ uint est_peeling = estimate_peeling(phase);
+ bool should_peel = 0 < est_peeling;
+
// Counted loops may be peeled, may need some iterations run up
// front for RCE, and may want to align loop refs to a cache
// line. Thus we clone a full loop up front whose trip count is
@@ -3252,14 +3288,15 @@
// peeling.
if (should_rce || should_align || should_unroll) {
if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
- if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
+ uint estimate = est_loop_clone_sz(3);
+ if (!phase->may_require_nodes(estimate)) {
return false;
}
- phase->insert_pre_post_loops(this,old_new, !may_rce_align);
+ phase->insert_pre_post_loops(this, old_new, !may_rce_align);
}
- // Adjust the pre- and main-loop limits to let the pre and post loops run
- // with full checks, but the main-loop with no checks. Remove said
- // checks from the main body.
+ // Adjust the pre- and main-loop limits to let the pre and post loops run
+ // with full checks, but the main-loop with no checks. Remove said checks
+ // from the main body.
if (should_rce) {
if (phase->do_range_check(this, old_new) != 0) {
cl->mark_has_range_checks();
@@ -3293,7 +3330,9 @@
}
} else { // Else we have an unchanged counted loop
if (should_peel) { // Might want to peel but do nothing else
- phase->do_peeling(this,old_new);
+ if (phase->may_require_nodes(est_peeling)) {
+ phase->do_peeling(this, old_new);
+ }
}
}
return true;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/loopopts/LoopUnswitchingBadNodeBudget.java Tue May 28 14:56:58 2019 +0200
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8223502
+ * @summary Node estimate for loop unswitching is not correct:
+ * assert(delta <= 2 * required) failed: Bad node estimate
+ *
+ * @requires !vm.graal.enabled
+ *
+ * @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation
+ * -XX:-UseOnStackReplacement -XX:CompileOnly=LoopUnswitchingBadNodeBudget::test
+ * -XX:CompileCommand=dontinline,LoopUnswitchingBadNodeBudget::helper
+ * -XX:+UnlockExperimentalVMOptions -XX:-UseSwitchProfiling LoopUnswitchingBadNodeBudget
+ *
+ */
+
+public class LoopUnswitchingBadNodeBudget {
+
+ public static void main(String[] args) {
+ for (int i = 0; i < 20_000; i++) {
+ for (int j = 0; j < 100; j++) {
+ test(j, true, 0, 0, 0);
+ test(j, false, 0, 0, 0);
+ }
+ }
+ }
+
+ private static int test(int j, boolean flag, int k, int l, int m) {
+ int res = 0;
+ for (int i = 0; i < 24; i++) {
+ if (flag) {
+ k = k / 2;
+ l = l * 2;
+ m = m + 2;
+ }
+ switch (j) {
+ case 0: break;
+ case 1: return helper(j, k, l, m);
+ case 2: return helper(j, k, l, m);
+ case 3: return helper(j, k, l, m);
+ case 4: return helper(j, k, l, m);
+ case 5: return helper(j, k, l, m);
+ case 6: return helper(j, k, l, m);
+ case 7: return helper(j, k, l, m);
+ case 8: return helper(j, k, l, m);
+ case 9: return helper(j, k, l, m);
+ case 10: return helper(j, k, l, m);
+ case 11: return helper(j, k, l, m);
+ case 12: return helper(j, k, l, m);
+ case 13: return helper(j, k, l, m);
+ case 14: return helper(j, k, l, m);
+ case 15: return helper(j, k, l, m);
+ case 16: return helper(j, k, l, m);
+ case 17: return helper(j, k, l, m);
+ case 18: return helper(j, k, l, m);
+ case 19: return helper(j, k, l, m);
+ case 20: return helper(j, k, l, m);
+ case 21: return helper(j, k, l, m);
+ case 22: return helper(j, k, l, m);
+ case 23: return helper(j, k, l, m);
+ case 24: return helper(j, k, l, m);
+ case 25: return helper(j, k, l, m);
+ case 26: return helper(j, k, l, m);
+ case 27: return helper(j, k, l, m);
+ case 28: return helper(j, k, l, m);
+ case 29: return helper(j, k, l, m);
+ case 30: return helper(j, k, l, m);
+ case 31: return helper(j, k, l, m);
+ case 32: return helper(j, k, l, m);
+ case 33: return helper(j, k, l, m);
+ case 34: return helper(j, k, l, m);
+ case 35: return helper(j, k, l, m);
+ case 36: return helper(j, k, l, m);
+ case 37: return helper(j, k, l, m);
+ case 38: return helper(j, k, l, m);
+ case 39: return helper(j, k, l, m);
+ case 40: return helper(j, k, l, m);
+ case 41: return helper(j, k, l, m);
+ case 42: return helper(j, k, l, m);
+ case 43: return helper(j, k, l, m);
+ case 44: return helper(j, k, l, m);
+ case 45: return helper(j, k, l, m);
+ case 46: return helper(j, k, l, m);
+ case 47: return helper(j, k, l, m);
+ case 48: return helper(j, k, l, m);
+ case 49: return helper(j, k, l, m);
+ case 50: return helper(j, k, l, m);
+ case 51: return helper(j, k, l, m);
+ case 52: return helper(j, k, l, m);
+ case 53: return helper(j, k, l, m);
+ case 54: return helper(j, k, l, m);
+ case 55: return helper(j, k, l, m);
+ case 56: return helper(j, k, l, m);
+ case 57: return helper(j, k, l, m);
+ case 58: return helper(j, k, l, m);
+ case 59: return helper(j, k, l, m);
+ case 60: return helper(j, k, l, m);
+ case 61: return helper(j, k, l, m);
+ case 62: return helper(j, k, l, m);
+ case 63: return helper(j, k, l, m);
+ case 64: return helper(j, k, l, m);
+ case 65: return helper(j, k, l, m);
+ case 66: return helper(j, k, l, m);
+ case 67: return helper(j, k, l, m);
+ case 68: return helper(j, k, l, m);
+ case 69: return helper(j, k, l, m);
+ case 70: return helper(j, k, l, m);
+ case 71: return helper(j, k, l, m);
+ case 72: return helper(j, k, l, m);
+ case 73: return helper(j, k, l, m);
+ case 74: return helper(j, k, l, m);
+ case 75: return helper(j, k, l, m);
+ case 76: return helper(j, k, l, m);
+ case 77: return helper(j, k, l, m);
+ case 78: return helper(j, k, l, m);
+ case 79: return helper(j, k, l, m);
+ case 80: return helper(j, k, l, m);
+ case 81: return helper(j, k, l, m);
+ case 82: return helper(j, k, l, m);
+ case 83: return helper(j, k, l, m);
+ case 84: return helper(j, k, l, m);
+ case 85: return helper(j, k, l, m);
+ case 86: return helper(j, k, l, m);
+ case 87: return helper(j, k, l, m);
+ case 88: return helper(j, k, l, m);
+ case 89: return helper(j, k, l, m);
+ case 90: return helper(j, k, l, m);
+ case 91: return helper(j, k, l, m);
+ case 92: return helper(j, k, l, m);
+ case 93: return helper(j, k, l, m);
+ case 94: return helper(j, k, l, m);
+ case 95: return helper(j, k, l, m);
+ case 96: return helper(j, k, l, m);
+ case 97: return helper(j, k, l, m);
+ case 98: return helper(j, k, l, m);
+ case 99: return helper(j, k, l, m);
+ }
+ res += helper(j, k, l, m);
+ }
+ return res;
+ }
+
+ private static int helper(int j, int k, int l, int m) {
+ return j + k;
+ }
+}