8212933: Thread-SMR: requesting a VM operation whilst holding a ThreadsListHandle can cause deadlocks
authorrehn
Wed, 31 Oct 2018 08:09:45 +0100
changeset 52341 2b58b8e1d28f
parent 52340 adb107c71a12
child 52342 9341b077bd55
8212933: Thread-SMR: requesting a VM operation whilst holding a ThreadsListHandle can cause deadlocks Reviewed-by: eosterlund, dcubed, sspitsyn, dholmes
src/hotspot/share/runtime/handshake.cpp
src/hotspot/share/runtime/handshake.hpp
src/hotspot/share/runtime/thread.cpp
src/hotspot/share/runtime/thread.hpp
src/hotspot/share/runtime/threadSMR.cpp
test/hotspot/jtreg/runtime/handshake/HandshakeWalkSuspendExitTest.java
--- a/src/hotspot/share/runtime/handshake.cpp	Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/handshake.cpp	Wed Oct 31 08:09:45 2018 +0100
@@ -41,7 +41,6 @@
 class HandshakeOperation: public StackObj {
 public:
   virtual void do_handshake(JavaThread* thread) = 0;
-  virtual void cancel_handshake(JavaThread* thread) = 0;
 };
 
 class HandshakeThreadsOperation: public HandshakeOperation {
@@ -51,8 +50,6 @@
 public:
   HandshakeThreadsOperation(ThreadClosure* cl) : _thread_cl(cl) {}
   void do_handshake(JavaThread* thread);
-  void cancel_handshake(JavaThread* thread) { _done.signal(); };
-
   bool thread_has_completed() { return _done.trywait(); }
 
 #ifdef ASSERT
@@ -121,15 +118,11 @@
     DEBUG_ONLY(_op->check_state();)
     TraceTime timer("Performing single-target operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
 
-    {
-      ThreadsListHandle tlh;
-      if (tlh.includes(_target)) {
-        set_handshake(_target);
-        _thread_alive = true;
-      }
-    }
-
-    if (!_thread_alive) {
+    ThreadsListHandle tlh;
+    if (tlh.includes(_target)) {
+      set_handshake(_target);
+      _thread_alive = true;
+    } else {
       return;
     }
 
@@ -147,20 +140,9 @@
       // We need to re-think this with SMR ThreadsList.
       // There is an assumption in the code that the Threads_lock should be
       // locked during certain phases.
-      MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
-      ThreadsListHandle tlh;
-      if (tlh.includes(_target)) {
-        // Warning _target's address might be re-used.
-        // handshake_process_by_vmthread will check the semaphore for us again.
-        // Since we can't have more then one handshake in flight a reuse of
-        // _target's address should be okay since the new thread will not have
-        // an operation.
+      {
+        MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
         _target->handshake_process_by_vmthread();
-      } else {
-        // We can't warn here since the thread does cancel_handshake after
-        // it has been removed from the ThreadsList. So we should just keep
-        // looping here until while below returns false. If we have a bug,
-        // then we hang here, which is good for debugging.
       }
     } while (!poll_for_completed_thread());
     DEBUG_ONLY(_op->check_state();)
@@ -179,8 +161,9 @@
     DEBUG_ONLY(_op->check_state();)
     TraceTime timer("Performing operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
 
+    JavaThreadIteratorWithHandle jtiwh;
     int number_of_threads_issued = 0;
-    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *thr = jtiwh.next(); ) {
+    for (JavaThread *thr = jtiwh.next(); thr != NULL; thr = jtiwh.next()) {
       set_handshake(thr);
       number_of_threads_issued++;
     }
@@ -210,8 +193,9 @@
           // We need to re-think this with SMR ThreadsList.
           // There is an assumption in the code that the Threads_lock should
           // be locked during certain phases.
+          jtiwh.rewind();
           MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
-          for (JavaThreadIteratorWithHandle jtiwh; JavaThread *thr = jtiwh.next(); ) {
+          for (JavaThread *thr = jtiwh.next(); thr != NULL; thr = jtiwh.next()) {
             // A new thread on the ThreadsList will not have an operation,
             // hence it is skipped in handshake_process_by_vmthread.
             thr->handshake_process_by_vmthread();
@@ -262,7 +246,11 @@
   FormatBufferResource message("Operation for thread " PTR_FORMAT ", is_vm_thread: %s",
                                p2i(thread), BOOL_TO_STR(Thread::current()->is_VM_thread()));
   TraceTime timer(message, TRACETIME_LOG(Debug, handshake, task));
-  _thread_cl->do_thread(thread);
+
+  // Only actually execute the operation for non terminated threads.
+  if (!thread->is_terminated()) {
+    _thread_cl->do_thread(thread);
+  }
 
   // Use the semaphore to inform the VM thread that we have completed the operation
   _done.signal();
@@ -306,12 +294,7 @@
 
 void HandshakeState::process_self_inner(JavaThread* thread) {
   assert(Thread::current() == thread, "should call from thread");
-
-  if (thread->is_terminated()) {
-    // If thread is not on threads list but armed, cancel.
-    thread->cancel_handshake();
-    return;
-  }
+  assert(!thread->is_terminated(), "should not be a terminated thread");
 
   CautiouslyPreserveExceptionMark pem(thread);
   ThreadInVMForHandshake tivm(thread);
@@ -327,16 +310,6 @@
   _semaphore.signal();
 }
 
-void HandshakeState::cancel_inner(JavaThread* thread) {
-  assert(Thread::current() == thread, "should call from thread");
-  assert(thread->thread_state() == _thread_in_vm, "must be in vm state");
-  HandshakeOperation* op = _operation;
-  clear_handshake(thread);
-  if (op != NULL) {
-    op->cancel_handshake(thread);
-  }
-}
-
 bool HandshakeState::vmthread_can_process_handshake(JavaThread* target) {
   // SafepointSynchronize::safepoint_safe() does not consider an externally
   // suspended thread to be safe. However, this function must be called with
@@ -344,7 +317,7 @@
   // resumed thus it is safe.
   assert(Threads_lock->owned_by_self(), "Not holding Threads_lock.");
   return SafepointSynchronize::safepoint_safe(target, target->thread_state()) ||
-         target->is_ext_suspended();
+         target->is_ext_suspended() || target->is_terminated();
 }
 
 static bool possibly_vmthread_can_process_handshake(JavaThread* target) {
@@ -355,6 +328,9 @@
   if (target->is_ext_suspended()) {
     return true;
   }
+  if (target->is_terminated()) {
+    return true;
+  }
   switch (target->thread_state()) {
   case _thread_in_native:
     // native threads are safe if they have no java stack or have walkable stack
@@ -381,6 +357,8 @@
 
 void HandshakeState::process_by_vmthread(JavaThread* target) {
   assert(Thread::current()->is_VM_thread(), "should call from vm thread");
+  // Threads_lock must be held here, but that is assert()ed in
+  // possibly_vmthread_can_process_handshake().
 
   if (!has_operation()) {
     // JT has already cleared its handshake
@@ -402,7 +380,6 @@
   // getting caught by the semaphore.
   if (vmthread_can_process_handshake(target)) {
     guarantee(!_semaphore.trywait(), "we should already own the semaphore");
-
     _operation->do_handshake(target);
     // Disarm after VM thread have executed the operation.
     clear_handshake(target);
--- a/src/hotspot/share/runtime/handshake.hpp	Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/handshake.hpp	Wed Oct 31 08:09:45 2018 +0100
@@ -60,7 +60,6 @@
   bool vmthread_can_process_handshake(JavaThread* target);
 
   void clear_handshake(JavaThread* thread);
-  void cancel_inner(JavaThread* thread);
 
   void process_self_inner(JavaThread* thread);
 public:
@@ -72,19 +71,13 @@
     return _operation != NULL;
   }
 
-  void cancel(JavaThread* thread) {
-    if (!_thread_in_process_handshake) {
-      FlagSetting fs(_thread_in_process_handshake, true);
-      cancel_inner(thread);
-    }
-  }
-
   void process_by_self(JavaThread* thread) {
     if (!_thread_in_process_handshake) {
       FlagSetting fs(_thread_in_process_handshake, true);
       process_self_inner(thread);
     }
   }
+
   void process_by_vmthread(JavaThread* target);
 };
 
--- a/src/hotspot/share/runtime/thread.cpp	Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/thread.cpp	Wed Oct 31 08:09:45 2018 +0100
@@ -4276,9 +4276,6 @@
   before_exit(thread);
 
   thread->exit(true);
-  // thread will never call smr_delete, instead of implicit cancel
-  // in wait_for_vm_thread_exit we do it explicit.
-  thread->cancel_handshake();
 
   // Stop VM thread.
   {
--- a/src/hotspot/share/runtime/thread.hpp	Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/thread.hpp	Wed Oct 31 08:09:45 2018 +0100
@@ -1271,10 +1271,6 @@
     return _handshake.has_operation();
   }
 
-  void cancel_handshake() {
-    _handshake.cancel(this);
-  }
-
   void handshake_process_by_self() {
     _handshake.process_by_self(this);
   }
--- a/src/hotspot/share/runtime/threadSMR.cpp	Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/threadSMR.cpp	Wed Oct 31 08:09:45 2018 +0100
@@ -989,11 +989,6 @@
     // Retry the whole scenario.
   }
 
-  if (ThreadLocalHandshakes) {
-    // The thread is about to be deleted so cancel any handshake.
-    thread->cancel_handshake();
-  }
-
   delete thread;
   if (EnableThreadSMRStatistics) {
     timer.stop();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/HandshakeWalkSuspendExitTest.java	Wed Oct 31 08:09:45 2018 +0100
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test HandshakeWalkSuspendExitTest
+ * @summary This test tries to stress the handshakes with new and exiting threads while suspending them.
+ * @library /testlibrary /test/lib
+ * @build HandshakeWalkSuspendExitTest
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI HandshakeWalkSuspendExitTest
+ */
+
+import jdk.test.lib.Asserts;
+import sun.hotspot.WhiteBox;
+
+public class HandshakeWalkSuspendExitTest  implements Runnable {
+
+    static final int _test_threads = 8;
+    static final int _test_exit_threads = 128;
+    static Thread[] _threads = new Thread[_test_threads];
+    static volatile boolean exit_now = false;
+    static java.util.concurrent.Semaphore _sem = new java.util.concurrent.Semaphore(0);
+
+    @Override
+    public void run() {
+        WhiteBox wb = WhiteBox.getWhiteBox();
+        while (!exit_now) {
+            _sem.release();
+            // We only suspend threads on even index and not ourself.
+            // Otherwise we can accidentially suspend all threads.
+            for (int i = 0; i < _threads.length; i += 2) {
+                wb.handshakeWalkStack(null /* ignored */, true /* stackwalk all threads */);
+                if (Thread.currentThread() != _threads[i]) {
+                    _threads[i].suspend();
+                    _threads[i].resume();
+                }
+            }
+            for (int i = 0; i < _threads.length; i += 2) {
+                wb.handshakeWalkStack(_threads[i] /* thread to stackwalk */, false /* stackwalk one thread */);
+                if (Thread.currentThread() != _threads[i]) {
+                    _threads[i].suspend();
+                    _threads[i].resume();
+                }
+            }
+        }
+    }
+
+    public static void main(String... args) throws Exception {
+        HandshakeWalkSuspendExitTest test = new HandshakeWalkSuspendExitTest();
+
+        for (int i = 0; i < _threads.length; i++) {
+            _threads[i] = new Thread(test);
+            _threads[i].start();
+        }
+        for (int i = 0; i < _test_threads; i++) {
+            _sem.acquire();
+        }
+        Thread[] exit_threads = new Thread[_test_exit_threads];
+        for (int i = 0; i < _test_exit_threads; i++) {
+            exit_threads[i] = new Thread(new Runnable() { public void run() {} });
+            exit_threads[i].start();
+        }
+        exit_now = true;
+        for (int i = 0; i < _threads.length; i++) {
+            _threads[i].join();
+        }
+        for (int i = 0; i < exit_threads.length; i++) {
+            exit_threads[i].join();
+        }
+    }
+}