6636138: UseSuperWord enabled failure
authorcfang
Tue, 24 Mar 2009 12:19:47 -0700
changeset 2334 742b05aeec42
parent 2333 79fb1ed554bd
child 2335 e1965d139bb7
child 2336 427ce3320578
6636138: UseSuperWord enabled failure Summary: Fixed SuperWord scheduling of memory operations. Reviewed-by: kvn, never
hotspot/src/share/vm/opto/superword.cpp
hotspot/src/share/vm/opto/superword.hpp
hotspot/test/compiler/6636138/Test1.java
hotspot/test/compiler/6636138/Test2.java
--- a/hotspot/src/share/vm/opto/superword.cpp	Mon Mar 23 13:58:58 2009 -0700
+++ b/hotspot/src/share/vm/opto/superword.cpp	Tue Mar 24 12:19:47 2009 -0700
@@ -454,9 +454,13 @@
           // or need to run igvn.optimize() again before SLP
         } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
           // Ditto.  Not sure what else to check further.
-        } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) {
+        } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
           // StoreCM has an input edge used as a precedence edge.
           // Maybe an issue when oop stores are vectorized.
+        } else if( out->is_MergeMem() && prev &&
+                   prev->Opcode() == Op_StoreCM && out == prev->in(MemNode::OopStore)) {
+          // Oop store is a MergeMem! This should not happen. Temporarily remove the assertion
+          // for this case because it could not be superwordized anyway.
         } else {
           assert(out == prev || prev == NULL, "no branches off of store slice");
         }
@@ -912,54 +916,175 @@
   }
 }
 
-//------------------------------co_locate_pack---------------------------
-// Within a pack, move stores down to the last executed store,
-// and move loads up to the first executed load.
+//-------------------------------remove_and_insert-------------------
+//remove "current" from its current position in the memory graph and insert
+//it after the appropriate insertion point (lip or uip)
+void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
+                                  Node *uip, Unique_Node_List &sched_before) {
+  Node* my_mem = current->in(MemNode::Memory);
+  _igvn.hash_delete(current);
+  _igvn.hash_delete(my_mem);
+
+  //remove current_store from its current position in the memmory graph
+  for (DUIterator i = current->outs(); current->has_out(i); i++) {
+    Node* use = current->out(i);
+    if (use->is_Mem()) {
+      assert(use->in(MemNode::Memory) == current, "must be");
+      _igvn.hash_delete(use);
+      if (use == prev) { // connect prev to my_mem
+        use->set_req(MemNode::Memory, my_mem);
+      } else if (sched_before.member(use)) {
+        _igvn.hash_delete(uip);
+        use->set_req(MemNode::Memory, uip);
+      } else {
+        _igvn.hash_delete(lip);
+        use->set_req(MemNode::Memory, lip);
+      }
+      _igvn._worklist.push(use);
+      --i; //deleted this edge; rescan position
+    }
+  }
+
+  bool sched_up = sched_before.member(current);
+  Node *insert_pt =  sched_up ?  uip : lip;
+  _igvn.hash_delete(insert_pt);
+
+  // all uses of insert_pt's memory state should use current's instead
+  for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
+    Node* use = insert_pt->out(i);
+    if (use->is_Mem()) {
+      assert(use->in(MemNode::Memory) == insert_pt, "must be");
+      _igvn.hash_delete(use);
+      use->set_req(MemNode::Memory, current);
+      _igvn._worklist.push(use);
+      --i; //deleted this edge; rescan position
+    } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
+      uint pos; //lip (lower insert point) must be the last one in the memory slice
+      _igvn.hash_delete(use);
+      for (pos=1; pos < use->req(); pos++) {
+        if (use->in(pos) == insert_pt) break;
+      }
+      use->set_req(pos, current);
+      _igvn._worklist.push(use);
+      --i;
+    }
+  }
+
+  //connect current to insert_pt
+  current->set_req(MemNode::Memory, insert_pt);
+  _igvn._worklist.push(current);
+}
+
+//------------------------------co_locate_pack----------------------------------
+// To schedule a store pack, we need to move any sandwiched memory ops either before
+// or after the pack, based upon dependence information:
+// (1) If any store in the pack depends on the sandwiched memory op, the
+//     sandwiched memory op must be scheduled BEFORE the pack;
+// (2) If a sandwiched memory op depends on any store in the pack, the
+//     sandwiched memory op must be scheduled AFTER the pack;
+// (3) If a sandwiched memory op (say, memA) depends on another sandwiched
+//     memory op (say memB), memB must be scheduled before memA. So, if memA is
+//     scheduled before the pack, memB must also be scheduled before the pack;
+// (4) If there is no dependence restriction for a sandwiched memory op, we simply
+//     schedule this store AFTER the pack
+// (5) We know there is no dependence cycle, so there in no other case;
+// (6) Finally, all memory ops in another single pack should be moved in the same direction.
+//
+// To schedule a load pack: the memory edge of every loads in the pack must be
+// the same as the memory edge of the last executed load in the pack
 void SuperWord::co_locate_pack(Node_List* pk) {
   if (pk->at(0)->is_Store()) {
-    // Push Stores down towards last executed pack member
     MemNode* first     = executed_first(pk)->as_Mem();
     MemNode* last      = executed_last(pk)->as_Mem();
-    MemNode* insert_pt = last;
+    Unique_Node_List schedule_before_pack;
+    Unique_Node_List memops;
+
     MemNode* current   = last->in(MemNode::Memory)->as_Mem();
+    MemNode* previous  = last;
     while (true) {
       assert(in_bb(current), "stay in block");
+      memops.push(previous);
+      for (DUIterator i = current->outs(); current->has_out(i); i++) {
+        Node* use = current->out(i);
+        if (use->is_Mem() && use != previous)
+          memops.push(use);
+      }
+      if(current == first) break;
+      previous = current;
+      current  = current->in(MemNode::Memory)->as_Mem();
+    }
+
+    // determine which memory operations should be scheduled before the pack
+    for (uint i = 1; i < memops.size(); i++) {
+      Node *s1 = memops.at(i);
+      if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {
+        for (uint j = 0; j< i; j++) {
+          Node *s2 = memops.at(j);
+          if (!independent(s1, s2)) {
+            if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
+              schedule_before_pack.push(s1); //s1 must be scheduled before
+              Node_List* mem_pk = my_pack(s1);
+              if (mem_pk != NULL) {
+                for (uint ii = 0; ii < mem_pk->size(); ii++) {
+                  Node* s = mem_pk->at(ii); // follow partner
+                  if (memops.member(s) && !schedule_before_pack.member(s))
+                    schedule_before_pack.push(s);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    MemNode* lower_insert_pt = last;
+    Node*    upper_insert_pt = first->in(MemNode::Memory);
+    previous                 = last; //previous store in pk
+    current                  = last->in(MemNode::Memory)->as_Mem();
+
+    //start scheduling from "last" to "first"
+    while (true) {
+      assert(in_bb(current), "stay in block");
+      assert(in_pack(previous, pk), "previous stays in pack");
       Node* my_mem = current->in(MemNode::Memory);
+
       if (in_pack(current, pk)) {
-        // Forward users of my memory state to my input memory state
+        // Forward users of my memory state (except "previous) to my input memory state
         _igvn.hash_delete(current);
-        _igvn.hash_delete(my_mem);
         for (DUIterator i = current->outs(); current->has_out(i); i++) {
           Node* use = current->out(i);
-          if (use->is_Mem()) {
+          if (use->is_Mem() && use != previous) {
             assert(use->in(MemNode::Memory) == current, "must be");
             _igvn.hash_delete(use);
-            use->set_req(MemNode::Memory, my_mem);
+            if (schedule_before_pack.member(use)) {
+              _igvn.hash_delete(upper_insert_pt);
+              use->set_req(MemNode::Memory, upper_insert_pt);
+            } else {
+              _igvn.hash_delete(lower_insert_pt);
+              use->set_req(MemNode::Memory, lower_insert_pt);
+            }
             _igvn._worklist.push(use);
             --i; // deleted this edge; rescan position
           }
         }
-        // put current immediately before insert_pt
-        current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory));
-        _igvn.hash_delete(insert_pt);
-        insert_pt->set_req(MemNode::Memory, current);
-        _igvn._worklist.push(insert_pt);
-        _igvn._worklist.push(current);
-        insert_pt = current;
+        previous = current;
+      } else { // !in_pack(current, pk) ==> a sandwiched store
+        remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack);
       }
+
       if (current == first) break;
       current = my_mem->as_Mem();
-    }
-  } else if (pk->at(0)->is_Load()) {
-    // Pull Loads up towards first executed pack member
-    LoadNode* first = executed_first(pk)->as_Load();
-    Node* first_mem = first->in(MemNode::Memory);
-    _igvn.hash_delete(first_mem);
-    // Give each load same memory state as first
+    } // end while
+  } else if (pk->at(0)->is_Load()) { //load
+    // all use the memory state that the last executed load uses
+    LoadNode* last_load  = executed_last(pk)->as_Load();
+    Node* last_mem       = last_load->in(MemNode::Memory);
+    _igvn.hash_delete(last_mem);
+    // Give each load same memory state as last
     for (uint i = 0; i < pk->size(); i++) {
       LoadNode* ld = pk->at(i)->as_Load();
       _igvn.hash_delete(ld);
-      ld->set_req(MemNode::Memory, first_mem);
+      ld->set_req(MemNode::Memory, last_mem);
       _igvn._worklist.push(ld);
     }
   }
--- a/hotspot/src/share/vm/opto/superword.hpp	Mon Mar 23 13:58:58 2009 -0700
+++ b/hotspot/src/share/vm/opto/superword.hpp	Tue Mar 24 12:19:47 2009 -0700
@@ -341,8 +341,11 @@
   void filter_packs();
   // Adjust the memory graph for the packed operations
   void schedule();
-  // Within a pack, move stores down to the last executed store,
-  // and move loads up to the first executed load.
+  // Remove "current" from its current position in the memory graph and insert
+  // it after the appropriate insert points (lip or uip);
+  void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before);
+  // Within a store pack, schedule stores together by moving out the sandwiched memory ops according
+  // to dependence info; and within a load pack, move loads down to the last executed load.
   void co_locate_pack(Node_List* p);
   // Convert packs into vector node operations
   void output();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6636138/Test1.java	Tue Mar 24 12:19:47 2009 -0700
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6636138
+ * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation.
+ *
+ * @run main/othervm -server -Xbatch -XX:CompileOnly=Test1.init -XX:+UseSuperword Test1
+ */
+
+class Test1 {
+
+    public static void init(int src[], int [] dst, int[] ref) {
+        // initialize the arrays
+        for (int i =0; i<src.length; i++) {
+            src[i] =  i;
+            dst[i] = 2;      // yes, dst[i] needed(otherwise src[i] will be replaced with i)
+            ref[i] = src[i]; // src[i] depends on the store src[i]
+        }
+    }
+
+    public static void verify(int src[], int[] ref) {
+        // check whether src and ref are equal
+        for (int i = 0; i < src.length; i++) {
+            if (src[i] != ref[i]) {
+                System.out.println("Error: src and ref don't match at " + i);
+                System.exit(-1);
+            }
+        }
+    }
+
+    public static void test() {
+        int[] src = new int[34];
+        int[] dst = new int[34];
+        int[] ref = new int[34];
+
+        init(src, dst, ref);
+        verify(src, ref);
+    }
+
+    public static void main(String[] args) {
+        for (int i=0; i< 2000; i++) {
+            test();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6636138/Test2.java	Tue Mar 24 12:19:47 2009 -0700
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6636138
+ * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation.
+ *
+ * @run main/othervm -server -Xbatch -XX:CompileOnly=Test2.shift -XX:+UseSuperword Test2
+ */
+
+class Test2 {
+
+    public static void init(int src[]) {
+        // Initialize the array
+        for (int i = 0; i < src.length; i++)
+            src[i] = i;
+    }
+
+   public static void shift(int src[]) {
+       //left-shift the array
+       for (int i = src.length-1; i > 0; i--){
+           int tmp  = src[i];
+           src[i]   = src[i-1];
+           src[i-1] = tmp;
+       }
+    }
+
+    public static void verify(int src[]) {
+        for (int i = 0; i < src.length; i++){
+            int value = (i-1 + src.length)%src.length; // correct value after shifting
+                if (src[i] != value) {
+                    System.out.println("Error: src["+i+"] should be "+ value + " instead of " + src[i]);
+                    System.exit(-1);
+                }
+        }
+    }
+
+    public static void test() {
+        int[] src = new int[10];
+        init(src);
+        shift(src);
+        verify(src);
+    }
+
+    public static void main(String[] args) {
+        for (int i=0; i< 2000; i++)
+            test();
+    }
+}