8212933: Thread-SMR: requesting a VM operation whilst holding a ThreadsListHandle can cause deadlocks
Reviewed-by: eosterlund, dcubed, sspitsyn, dholmes
--- a/src/hotspot/share/runtime/handshake.cpp Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/handshake.cpp Wed Oct 31 08:09:45 2018 +0100
@@ -41,7 +41,6 @@
class HandshakeOperation: public StackObj {
public:
virtual void do_handshake(JavaThread* thread) = 0;
- virtual void cancel_handshake(JavaThread* thread) = 0;
};
class HandshakeThreadsOperation: public HandshakeOperation {
@@ -51,8 +50,6 @@
public:
HandshakeThreadsOperation(ThreadClosure* cl) : _thread_cl(cl) {}
void do_handshake(JavaThread* thread);
- void cancel_handshake(JavaThread* thread) { _done.signal(); };
-
bool thread_has_completed() { return _done.trywait(); }
#ifdef ASSERT
@@ -121,15 +118,11 @@
DEBUG_ONLY(_op->check_state();)
TraceTime timer("Performing single-target operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
- {
- ThreadsListHandle tlh;
- if (tlh.includes(_target)) {
- set_handshake(_target);
- _thread_alive = true;
- }
- }
-
- if (!_thread_alive) {
+ ThreadsListHandle tlh;
+ if (tlh.includes(_target)) {
+ set_handshake(_target);
+ _thread_alive = true;
+ } else {
return;
}
@@ -147,20 +140,9 @@
// We need to re-think this with SMR ThreadsList.
// There is an assumption in the code that the Threads_lock should be
// locked during certain phases.
- MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
- ThreadsListHandle tlh;
- if (tlh.includes(_target)) {
- // Warning _target's address might be re-used.
- // handshake_process_by_vmthread will check the semaphore for us again.
- // Since we can't have more then one handshake in flight a reuse of
- // _target's address should be okay since the new thread will not have
- // an operation.
+ {
+ MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
_target->handshake_process_by_vmthread();
- } else {
- // We can't warn here since the thread does cancel_handshake after
- // it has been removed from the ThreadsList. So we should just keep
- // looping here until while below returns false. If we have a bug,
- // then we hang here, which is good for debugging.
}
} while (!poll_for_completed_thread());
DEBUG_ONLY(_op->check_state();)
@@ -179,8 +161,9 @@
DEBUG_ONLY(_op->check_state();)
TraceTime timer("Performing operation (vmoperation doit)", TRACETIME_LOG(Info, handshake));
+ JavaThreadIteratorWithHandle jtiwh;
int number_of_threads_issued = 0;
- for (JavaThreadIteratorWithHandle jtiwh; JavaThread *thr = jtiwh.next(); ) {
+ for (JavaThread *thr = jtiwh.next(); thr != NULL; thr = jtiwh.next()) {
set_handshake(thr);
number_of_threads_issued++;
}
@@ -210,8 +193,9 @@
// We need to re-think this with SMR ThreadsList.
// There is an assumption in the code that the Threads_lock should
// be locked during certain phases.
+ jtiwh.rewind();
MutexLockerEx ml(Threads_lock, Mutex::_no_safepoint_check_flag);
- for (JavaThreadIteratorWithHandle jtiwh; JavaThread *thr = jtiwh.next(); ) {
+ for (JavaThread *thr = jtiwh.next(); thr != NULL; thr = jtiwh.next()) {
// A new thread on the ThreadsList will not have an operation,
// hence it is skipped in handshake_process_by_vmthread.
thr->handshake_process_by_vmthread();
@@ -262,7 +246,11 @@
FormatBufferResource message("Operation for thread " PTR_FORMAT ", is_vm_thread: %s",
p2i(thread), BOOL_TO_STR(Thread::current()->is_VM_thread()));
TraceTime timer(message, TRACETIME_LOG(Debug, handshake, task));
- _thread_cl->do_thread(thread);
+
+ // Only actually execute the operation for non terminated threads.
+ if (!thread->is_terminated()) {
+ _thread_cl->do_thread(thread);
+ }
// Use the semaphore to inform the VM thread that we have completed the operation
_done.signal();
@@ -306,12 +294,7 @@
void HandshakeState::process_self_inner(JavaThread* thread) {
assert(Thread::current() == thread, "should call from thread");
-
- if (thread->is_terminated()) {
- // If thread is not on threads list but armed, cancel.
- thread->cancel_handshake();
- return;
- }
+ assert(!thread->is_terminated(), "should not be a terminated thread");
CautiouslyPreserveExceptionMark pem(thread);
ThreadInVMForHandshake tivm(thread);
@@ -327,16 +310,6 @@
_semaphore.signal();
}
-void HandshakeState::cancel_inner(JavaThread* thread) {
- assert(Thread::current() == thread, "should call from thread");
- assert(thread->thread_state() == _thread_in_vm, "must be in vm state");
- HandshakeOperation* op = _operation;
- clear_handshake(thread);
- if (op != NULL) {
- op->cancel_handshake(thread);
- }
-}
-
bool HandshakeState::vmthread_can_process_handshake(JavaThread* target) {
// SafepointSynchronize::safepoint_safe() does not consider an externally
// suspended thread to be safe. However, this function must be called with
@@ -344,7 +317,7 @@
// resumed thus it is safe.
assert(Threads_lock->owned_by_self(), "Not holding Threads_lock.");
return SafepointSynchronize::safepoint_safe(target, target->thread_state()) ||
- target->is_ext_suspended();
+ target->is_ext_suspended() || target->is_terminated();
}
static bool possibly_vmthread_can_process_handshake(JavaThread* target) {
@@ -355,6 +328,9 @@
if (target->is_ext_suspended()) {
return true;
}
+ if (target->is_terminated()) {
+ return true;
+ }
switch (target->thread_state()) {
case _thread_in_native:
// native threads are safe if they have no java stack or have walkable stack
@@ -381,6 +357,8 @@
void HandshakeState::process_by_vmthread(JavaThread* target) {
assert(Thread::current()->is_VM_thread(), "should call from vm thread");
+ // Threads_lock must be held here, but that is assert()ed in
+ // possibly_vmthread_can_process_handshake().
if (!has_operation()) {
// JT has already cleared its handshake
@@ -402,7 +380,6 @@
// getting caught by the semaphore.
if (vmthread_can_process_handshake(target)) {
guarantee(!_semaphore.trywait(), "we should already own the semaphore");
-
_operation->do_handshake(target);
// Disarm after VM thread have executed the operation.
clear_handshake(target);
--- a/src/hotspot/share/runtime/handshake.hpp Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/handshake.hpp Wed Oct 31 08:09:45 2018 +0100
@@ -60,7 +60,6 @@
bool vmthread_can_process_handshake(JavaThread* target);
void clear_handshake(JavaThread* thread);
- void cancel_inner(JavaThread* thread);
void process_self_inner(JavaThread* thread);
public:
@@ -72,19 +71,13 @@
return _operation != NULL;
}
- void cancel(JavaThread* thread) {
- if (!_thread_in_process_handshake) {
- FlagSetting fs(_thread_in_process_handshake, true);
- cancel_inner(thread);
- }
- }
-
void process_by_self(JavaThread* thread) {
if (!_thread_in_process_handshake) {
FlagSetting fs(_thread_in_process_handshake, true);
process_self_inner(thread);
}
}
+
void process_by_vmthread(JavaThread* target);
};
--- a/src/hotspot/share/runtime/thread.cpp Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/thread.cpp Wed Oct 31 08:09:45 2018 +0100
@@ -4276,9 +4276,6 @@
before_exit(thread);
thread->exit(true);
- // thread will never call smr_delete, instead of implicit cancel
- // in wait_for_vm_thread_exit we do it explicit.
- thread->cancel_handshake();
// Stop VM thread.
{
--- a/src/hotspot/share/runtime/thread.hpp Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/thread.hpp Wed Oct 31 08:09:45 2018 +0100
@@ -1271,10 +1271,6 @@
return _handshake.has_operation();
}
- void cancel_handshake() {
- _handshake.cancel(this);
- }
-
void handshake_process_by_self() {
_handshake.process_by_self(this);
}
--- a/src/hotspot/share/runtime/threadSMR.cpp Wed Oct 31 09:09:23 2018 +0100
+++ b/src/hotspot/share/runtime/threadSMR.cpp Wed Oct 31 08:09:45 2018 +0100
@@ -989,11 +989,6 @@
// Retry the whole scenario.
}
- if (ThreadLocalHandshakes) {
- // The thread is about to be deleted so cancel any handshake.
- thread->cancel_handshake();
- }
-
delete thread;
if (EnableThreadSMRStatistics) {
timer.stop();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/handshake/HandshakeWalkSuspendExitTest.java Wed Oct 31 08:09:45 2018 +0100
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test HandshakeWalkSuspendExitTest
+ * @summary This test tries to stress the handshakes with new and exiting threads while suspending them.
+ * @library /testlibrary /test/lib
+ * @build HandshakeWalkSuspendExitTest
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ * sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI HandshakeWalkSuspendExitTest
+ */
+
+import jdk.test.lib.Asserts;
+import sun.hotspot.WhiteBox;
+
+public class HandshakeWalkSuspendExitTest implements Runnable {
+
+ static final int _test_threads = 8;
+ static final int _test_exit_threads = 128;
+ static Thread[] _threads = new Thread[_test_threads];
+ static volatile boolean exit_now = false;
+ static java.util.concurrent.Semaphore _sem = new java.util.concurrent.Semaphore(0);
+
+ @Override
+ public void run() {
+ WhiteBox wb = WhiteBox.getWhiteBox();
+ while (!exit_now) {
+ _sem.release();
+ // We only suspend threads on even index and not ourself.
+ // Otherwise we can accidentially suspend all threads.
+ for (int i = 0; i < _threads.length; i += 2) {
+ wb.handshakeWalkStack(null /* ignored */, true /* stackwalk all threads */);
+ if (Thread.currentThread() != _threads[i]) {
+ _threads[i].suspend();
+ _threads[i].resume();
+ }
+ }
+ for (int i = 0; i < _threads.length; i += 2) {
+ wb.handshakeWalkStack(_threads[i] /* thread to stackwalk */, false /* stackwalk one thread */);
+ if (Thread.currentThread() != _threads[i]) {
+ _threads[i].suspend();
+ _threads[i].resume();
+ }
+ }
+ }
+ }
+
+ public static void main(String... args) throws Exception {
+ HandshakeWalkSuspendExitTest test = new HandshakeWalkSuspendExitTest();
+
+ for (int i = 0; i < _threads.length; i++) {
+ _threads[i] = new Thread(test);
+ _threads[i].start();
+ }
+ for (int i = 0; i < _test_threads; i++) {
+ _sem.acquire();
+ }
+ Thread[] exit_threads = new Thread[_test_exit_threads];
+ for (int i = 0; i < _test_exit_threads; i++) {
+ exit_threads[i] = new Thread(new Runnable() { public void run() {} });
+ exit_threads[i].start();
+ }
+ exit_now = true;
+ for (int i = 0; i < _threads.length; i++) {
+ _threads[i].join();
+ }
+ for (int i = 0; i < exit_threads.length; i++) {
+ exit_threads[i].join();
+ }
+ }
+}