# HG changeset patch # User zmajo # Date 1439972044 -7200 # Node ID 6346c7568a0363d8e3810f73185d8acc8908879f # Parent 01e2f5e916c7f7e3199f75f4f244f176e5bee3ab# Parent b82e88dcb26cac9f9cbe66c3400171005cabaa84 Merge diff -r 01e2f5e916c7 -r 6346c7568a03 hotspot/src/share/vm/opto/loopnode.cpp --- a/hotspot/src/share/vm/opto/loopnode.cpp Wed Aug 19 08:55:18 2015 +0200 +++ b/hotspot/src/share/vm/opto/loopnode.cpp Wed Aug 19 10:14:04 2015 +0200 @@ -1175,7 +1175,7 @@ //============================================================================= //------------------------------is_member-------------------------------------- // Is 'l' a member of 'this'? -int IdealLoopTree::is_member( const IdealLoopTree *l ) const { +bool IdealLoopTree::is_member(const IdealLoopTree *l) const { while( l->_nest > _nest ) l = l->_parent; return l == this; } diff -r 01e2f5e916c7 -r 6346c7568a03 hotspot/src/share/vm/opto/loopnode.hpp --- a/hotspot/src/share/vm/opto/loopnode.hpp Wed Aug 19 08:55:18 2015 +0200 +++ b/hotspot/src/share/vm/opto/loopnode.hpp Wed Aug 19 10:14:04 2015 +0200 @@ -384,7 +384,7 @@ { } // Is 'l' a member of 'this'? - int is_member( const IdealLoopTree *l ) const; // Test for nested membership + bool is_member(const IdealLoopTree *l) const; // Test for nested membership // Set loop nesting depth. Accumulate has_call bits. int set_nest( uint depth ); @@ -1086,6 +1086,8 @@ bool split_up( Node *n, Node *blk1, Node *blk2 ); void sink_use( Node *use, Node *post_loop ); Node *place_near_use( Node *useblock ) const; + Node* try_move_store_before_loop(Node* n, Node *n_ctrl); + void try_move_store_after_loop(Node* n); bool _created_loop_node; public: diff -r 01e2f5e916c7 -r 6346c7568a03 hotspot/src/share/vm/opto/loopopts.cpp --- a/hotspot/src/share/vm/opto/loopopts.cpp Wed Aug 19 08:55:18 2015 +0200 +++ b/hotspot/src/share/vm/opto/loopopts.cpp Wed Aug 19 10:14:04 2015 +0200 @@ -653,6 +653,209 @@ return iff->in(1); } +#ifdef ASSERT +static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) { + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { + Node* u = m->fast_out(i); + if (u->is_CFG()) { + if (u->Opcode() == Op_NeverBranch) { + u = ((NeverBranchNode*)u)->proj_out(0); + enqueue_cfg_uses(u, wq); + } else { + wq.push(u); + } + } + } +} +#endif + +// Try moving a store out of a loop, right before the loop +Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) { + // Store has to be first in the loop body + IdealLoopTree *n_loop = get_loop(n_ctrl); + if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop()) { + assert(n->in(0), "store should have control set"); + Node* address = n->in(MemNode::Address); + Node* value = n->in(MemNode::ValueIn); + Node* mem = n->in(MemNode::Memory); + IdealLoopTree* address_loop = get_loop(get_ctrl(address)); + IdealLoopTree* value_loop = get_loop(get_ctrl(value)); + + // - address and value must be loop invariant + // - memory must be a memory Phi for the loop + // - Store must be the only store on this memory slice in the + // loop: if there's another store following this one then value + // written at iteration i by the second store could be overwritten + // at iteration i+n by the first store: it's not safe to move the + // first store out of the loop + // - nothing must observe the Phi memory: it guarantees no read + // before the store and no early exit out of the loop + // With those conditions, we are also guaranteed the store post + // dominates the loop head. Otherwise there would be extra Phi + // involved between the loop's Phi and the store. + + if (!n_loop->is_member(address_loop) && + !n_loop->is_member(value_loop) && + mem->is_Phi() && mem->in(0) == n_loop->_head && + mem->outcnt() == 1 && + mem->in(LoopNode::LoopBackControl) == n) { + +#ifdef ASSERT + // Verify that store's control does post dominate loop entry and + // that there's no early exit of the loop before the store. + bool ctrl_ok = false; + { + // Follow control from loop head until n, we exit the loop or + // we reach the tail + ResourceMark rm; + Unique_Node_List wq; + wq.push(n_loop->_head); + assert(n_loop->_tail != NULL, "need a tail"); + for (uint next = 0; next < wq.size(); ++next) { + Node *m = wq.at(next); + if (m == n->in(0)) { + ctrl_ok = true; + continue; + } + assert(!has_ctrl(m), "should be CFG"); + if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) { + ctrl_ok = false; + break; + } + enqueue_cfg_uses(m, wq); + } + } + assert(ctrl_ok, "bad control"); +#endif + + // move the Store + _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem); + _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl)); + _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl)); + // Disconnect the phi now. An empty phi can confuse other + // optimizations in this pass of loop opts. + _igvn.replace_node(mem, mem->in(LoopNode::EntryControl)); + n_loop->_body.yank(mem); + + IdealLoopTree* new_loop = get_loop(n->in(0)); + set_ctrl_and_loop(n, n->in(0)); + + return n; + } + } + return NULL; +} + +// Try moving a store out of a loop, right after the loop +void PhaseIdealLoop::try_move_store_after_loop(Node* n) { + if (n->is_Store()) { + assert(n->in(0), "store should have control set"); + Node *n_ctrl = get_ctrl(n); + IdealLoopTree *n_loop = get_loop(n_ctrl); + // Store must be in a loop + if (n_loop != _ltree_root && !n_loop->_irreducible) { + Node* address = n->in(MemNode::Address); + Node* value = n->in(MemNode::ValueIn); + IdealLoopTree* address_loop = get_loop(get_ctrl(address)); + // address must be loop invariant + if (!n_loop->is_member(address_loop)) { + // Store must be last on this memory slice in the loop and + // nothing in the loop must observe it + Node* phi = NULL; + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + if (has_ctrl(u)) { // control use? + IdealLoopTree *u_loop = get_loop(get_ctrl(u)); + if (!n_loop->is_member(u_loop)) { + continue; + } + if (u->is_Phi() && u->in(0) == n_loop->_head) { + assert(_igvn.type(u) == Type::MEMORY, "bad phi"); + assert(phi == NULL, "already found"); + phi = u; + continue; + } + } + phi = NULL; + break; + } + if (phi != NULL) { + // Nothing in the loop before the store (next iteration) + // must observe the stored value + bool mem_ok = true; + { + ResourceMark rm; + Unique_Node_List wq; + wq.push(phi); + for (uint next = 0; next < wq.size() && mem_ok; ++next) { + Node *m = wq.at(next); + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) { + Node* u = m->fast_out(i); + if (u->is_Store() || u->is_Phi()) { + if (u != n) { + wq.push(u); + mem_ok = (wq.size() <= 10); + } + } else { + mem_ok = false; + break; + } + } + } + } + if (mem_ok) { + // Move the Store out of the loop creating clones along + // all paths out of the loop that observe the stored value + _igvn.rehash_node_delayed(phi); + int count = phi->replace_edge(n, n->in(MemNode::Memory)); + assert(count > 0, "inconsistent phi"); + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + Node* c = get_ctrl(u); + + if (u->is_Phi()) { + c = u->in(0)->in(u->find_edge(n)); + } + IdealLoopTree *u_loop = get_loop(c); + assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop"); + while(true) { + Node* next_c = find_non_split_ctrl(idom(c)); + if (n_loop->is_member(get_loop(next_c))) { + break; + } + c = next_c; + } + + Node* st = n->clone(); + st->set_req(0, c); + _igvn.register_new_node_with_optimizer(st); + + set_ctrl(st, c); + IdealLoopTree* new_loop = get_loop(c); + assert(new_loop != n_loop, "should be moved out of loop"); + if (new_loop->_child == NULL) new_loop->_body.push(st); + + _igvn.replace_input_of(u, u->find_edge(n), st); + --imax; + --i; + } + + + assert(n->outcnt() == 0, "all uses should be gone"); + _igvn.replace_input_of(n, MemNode::Memory, C->top()); + // Disconnect the phi now. An empty phi can confuse other + // optimizations in this pass of loop opts.. + if (phi->in(LoopNode::LoopBackControl) == phi) { + _igvn.replace_node(phi, phi->in(LoopNode::EntryControl)); + n_loop->_body.yank(phi); + } + } + } + } + } + } +} + //------------------------------split_if_with_blocks_pre----------------------- // Do the real work in a non-recursive function. Data nodes want to be // cloned in the pre-order so they can feed each other nicely. @@ -683,6 +886,11 @@ Node *n_ctrl = get_ctrl(n); if( !n_ctrl ) return n; // Dead node + Node* res = try_move_store_before_loop(n, n_ctrl); + if (res != NULL) { + return n; + } + // Attempt to remix address expressions for loop invariants Node *m = remix_address_expressions( n ); if( m ) return m; @@ -691,16 +899,18 @@ // Returns the block to clone thru. Node *n_blk = has_local_phi_input( n ); if( !n_blk ) return n; + // Do not clone the trip counter through on a CountedLoop // (messes up the canonical shape). if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n; // Check for having no control input; not pinned. Allow // dominating control. - if( n->in(0) ) { + if (n->in(0)) { Node *dom = idom(n_blk); - if( dom_lca( n->in(0), dom ) != n->in(0) ) + if (dom_lca(n->in(0), dom) != n->in(0)) { return n; + } } // Policy: when is it profitable. You must get more wins than // policy before it is considered profitable. Policy is usually 0, @@ -1029,6 +1239,8 @@ } } + try_move_store_after_loop(n); + // Check for Opaque2's who's loop has disappeared - who's input is in the // same loop nest as their output. Remove 'em, they are no longer useful. if( n_op == Op_Opaque2 && diff -r 01e2f5e916c7 -r 6346c7568a03 hotspot/src/share/vm/opto/memnode.cpp --- a/hotspot/src/share/vm/opto/memnode.cpp Wed Aug 19 08:55:18 2015 +0200 +++ b/hotspot/src/share/vm/opto/memnode.cpp Wed Aug 19 10:14:04 2015 +0200 @@ -2371,40 +2371,47 @@ Node* mem = in(MemNode::Memory); Node* address = in(MemNode::Address); - // Back-to-back stores to same address? Fold em up. Generally // unsafe if I have intervening uses... Also disallowed for StoreCM // since they must follow each StoreP operation. Redundant StoreCMs // are eliminated just before matching in final_graph_reshape. - if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) && - mem->Opcode() != Op_StoreCM) { - // Looking at a dead closed cycle of memory? - assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal"); - - assert(Opcode() == mem->Opcode() || - phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw, - "no mismatched stores, except on raw memory"); - - if (mem->outcnt() == 1 && // check for intervening uses - mem->as_Store()->memory_size() <= this->memory_size()) { - // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away. - // For example, 'mem' might be the final state at a conditional return. - // Or, 'mem' might be used by some node which is live at the same time - // 'this' is live, which might be unschedulable. So, require exactly - // ONE user, the 'this' store, until such time as we clone 'mem' for - // each of 'mem's uses (thus making the exactly-1-user-rule hold true). - if (can_reshape) { // (%%% is this an anachronism?) - set_req_X(MemNode::Memory, mem->in(MemNode::Memory), - phase->is_IterGVN()); - } else { - // It's OK to do this in the parser, since DU info is always accurate, - // and the parser always refers to nodes via SafePointNode maps. - set_req(MemNode::Memory, mem->in(MemNode::Memory)); + { + Node* st = mem; + // If Store 'st' has more than one use, we cannot fold 'st' away. + // For example, 'st' might be the final state at a conditional + // return. Or, 'st' might be used by some node which is live at + // the same time 'st' is live, which might be unschedulable. So, + // require exactly ONE user until such time as we clone 'mem' for + // each of 'mem's uses (thus making the exactly-1-user-rule hold + // true). + while (st->is_Store() && st->outcnt() == 1 && st->Opcode() != Op_StoreCM) { + // Looking at a dead closed cycle of memory? + assert(st != st->in(MemNode::Memory), "dead loop in StoreNode::Ideal"); + assert(Opcode() == st->Opcode() || + st->Opcode() == Op_StoreVector || + Opcode() == Op_StoreVector || + phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw || + (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode + err_msg_res("no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()])); + + if (st->in(MemNode::Address)->eqv_uncast(address) && + st->as_Store()->memory_size() <= this->memory_size()) { + Node* use = st->raw_out(0); + phase->igvn_rehash_node_delayed(use); + if (can_reshape) { + use->set_req_X(MemNode::Memory, st->in(MemNode::Memory), phase->is_IterGVN()); + } else { + // It's OK to do this in the parser, since DU info is always accurate, + // and the parser always refers to nodes via SafePointNode maps. + use->set_req(MemNode::Memory, st->in(MemNode::Memory)); + } + return this; } - return this; + st = st->in(MemNode::Memory); } } + // Capture an unaliased, unconditional, simple store into an initializer. // Or, if it is independent of the allocation, hoist it above the allocation. if (ReduceFieldZeroing && /*can_reshape &&*/ diff -r 01e2f5e916c7 -r 6346c7568a03 hotspot/test/compiler/loopopts/TestMoveStoresOutOfLoops.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/loopopts/TestMoveStoresOutOfLoops.java Wed Aug 19 10:14:04 2015 +0200 @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8080289 + * @summary Sink stores out of loops if possible + * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+PrintCompilation -XX:CompileCommand=dontinline,TestMoveStoresOutOfLoops::test* TestMoveStoresOutOfLoops + * + */ + +import java.lang.reflect.*; +import java.util.*; +import java.util.function.*; + +public class TestMoveStoresOutOfLoops { + + private static long[] array = new long[10]; + private static long[] array2 = new long[10]; + private static boolean[] array3 = new boolean[1000]; + private static byte[] byte_array = new byte[10]; + + // Array store should be moved out of the loop, value stored + // should be 999, the loop should be eliminated + static void test_after_1(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + } + } + + // Array store can't be moved out of loop because of following + // non loop invariant array access + static void test_after_2(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + array2[i%10] = i; + } + } + + // Array store can't be moved out of loop because of following + // use + static void test_after_3(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + if (array[0] == -1) { + break; + } + } + } + + // Array store can't be moved out of loop because of preceding + // use + static void test_after_4(int idx) { + for (int i = 0; i < 1000; i++) { + if (array[0] == -2) { + break; + } + array[idx] = i; + } + } + + // All array stores should be moved out of the loop, one after + // the other + static void test_after_5(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + array[idx+1] = i; + array[idx+2] = i; + array[idx+3] = i; + array[idx+4] = i; + array[idx+5] = i; + } + } + + // Array store can be moved after the loop but needs to be + // cloned on both exit paths + static void test_after_6(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = i; + if (array3[i]) { + return; + } + } + } + + // Optimize out redundant stores + static void test_stores_1(int ignored) { + array[0] = 0; + array[1] = 1; + array[2] = 2; + array[0] = 0; + array[1] = 1; + array[2] = 2; + } + + static void test_stores_2(int idx) { + array[idx+0] = 0; + array[idx+1] = 1; + array[idx+2] = 2; + array[idx+0] = 0; + array[idx+1] = 1; + array[idx+2] = 2; + } + + static void test_stores_3(int idx) { + byte_array[idx+0] = 0; + byte_array[idx+1] = 1; + byte_array[idx+2] = 2; + byte_array[idx+0] = 0; + byte_array[idx+1] = 1; + byte_array[idx+2] = 2; + } + + // Array store can be moved out of the loop before the loop header + static void test_before_1(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = 999; + } + } + + // Array store can't be moved out of the loop before the loop + // header because there's more than one store on this slice + static void test_before_2(int idx) { + for (int i = 0; i < 1000; i++) { + array[idx] = 999; + array[i%2] = 0; + } + } + + // Array store can't be moved out of the loop before the loop + // header because of use before store + static int test_before_3(int idx) { + int res = 0; + for (int i = 0; i < 1000; i++) { + res += array[i%10]; + array[idx] = 999; + } + return res; + } + + // Array store can't be moved out of the loop before the loop + // header because of possible early exit + static void test_before_4(int idx) { + for (int i = 0; i < 1000; i++) { + if (idx / (i+1) > 0) { + return; + } + array[idx] = 999; + } + } + + // Array store can't be moved out of the loop before the loop + // header because it doesn't postdominate the loop head + static void test_before_5(int idx) { + for (int i = 0; i < 1000; i++) { + if (i % 2 == 0) { + array[idx] = 999; + } + } + } + + // Array store can be moved out of the loop before the loop header + static int test_before_6(int idx) { + int res = 0; + for (int i = 0; i < 1000; i++) { + if (i%2 == 1) { + res *= 2; + } else { + res++; + } + array[idx] = 999; + } + return res; + } + + final HashMap tests = new HashMap<>(); + { + for (Method m : this.getClass().getDeclaredMethods()) { + if (m.getName().matches("test_(before|after|stores)_[0-9]+")) { + assert(Modifier.isStatic(m.getModifiers())) : m; + tests.put(m.getName(), m); + } + } + } + + boolean success = true; + void doTest(String name, Runnable init, Function check) throws Exception { + Method m = tests.get(name); + for (int i = 0; i < 20000; i++) { + init.run(); + m.invoke(null, 0); + success = success && check.apply(name); + if (!success) { + break; + } + } + } + + static void array_init() { + array[0] = -1; + } + + static boolean array_check(String name) { + boolean success = true; + if (array[0] != 999) { + success = false; + System.out.println(name + " failed: array[0] = " + array[0]); + } + return success; + } + + static void array_init2() { + for (int i = 0; i < 6; i++) { + array[i] = -1; + } + } + + static boolean array_check2(String name) { + boolean success = true; + for (int i = 0; i < 6; i++) { + if (array[i] != 999) { + success = false; + System.out.println(name + " failed: array[" + i + "] = " + array[i]); + } + } + return success; + } + + static void array_init3() { + for (int i = 0; i < 3; i++) { + array[i] = -1; + } + } + + static boolean array_check3(String name) { + boolean success = true; + for (int i = 0; i < 3; i++) { + if (array[i] != i) { + success = false; + System.out.println(name + " failed: array[" + i + "] = " + array[i]); + } + } + return success; + } + + static void array_init4() { + for (int i = 0; i < 3; i++) { + byte_array[i] = -1; + } + } + + static boolean array_check4(String name) { + boolean success = true; + for (int i = 0; i < 3; i++) { + if (byte_array[i] != i) { + success = false; + System.out.println(name + " failed: byte_array[" + i + "] = " + byte_array[i]); + } + } + return success; + } + + static public void main(String[] args) throws Exception { + TestMoveStoresOutOfLoops test = new TestMoveStoresOutOfLoops(); + test.doTest("test_after_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_after_5", TestMoveStoresOutOfLoops::array_init2, TestMoveStoresOutOfLoops::array_check2); + test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + array3[999] = true; + test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + + test.doTest("test_stores_1", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3); + test.doTest("test_stores_2", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3); + test.doTest("test_stores_3", TestMoveStoresOutOfLoops::array_init4, TestMoveStoresOutOfLoops::array_check4); + + test.doTest("test_before_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_5", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + test.doTest("test_before_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check); + + if (!test.success) { + throw new RuntimeException("Some tests failed"); + } + } +}