8219584: Try to dump error file by thread which causes safepoint timeout
Reviewed-by: stuefe, dholmes, kvn
--- a/src/hotspot/os/posix/os_posix.cpp Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/os/posix/os_posix.cpp Fri Mar 08 11:23:30 2019 +0100
@@ -31,6 +31,7 @@
#include "runtime/interfaceSupport.inline.hpp"
#include "services/memTracker.hpp"
#include "utilities/align.hpp"
+#include "utilities/events.hpp"
#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#include "utilities/vmError.hpp"
@@ -1269,6 +1270,15 @@
return true;
}
+bool os::signal_sent_by_kill(const void* siginfo) {
+ const siginfo_t* const si = (const siginfo_t*)siginfo;
+ return si->si_code == SI_USER || si->si_code == SI_QUEUE
+#ifdef SI_TKILL
+ || si->si_code == SI_TKILL
+#endif
+ ;
+}
+
void os::print_siginfo(outputStream* os, const void* si0) {
const siginfo_t* const si = (const siginfo_t*) si0;
@@ -1299,7 +1309,7 @@
// so it depends on the context which member to use. For synchronous error signals,
// we print si_addr, unless the signal was sent by another process or thread, in
// which case we print out pid or tid of the sender.
- if (si->si_code == SI_USER || si->si_code == SI_QUEUE) {
+ if (signal_sent_by_kill(si)) {
const pid_t pid = si->si_pid;
os->print(", si_pid: %ld", (long) pid);
if (IS_VALID_PID(pid)) {
@@ -1325,6 +1335,25 @@
}
+bool os::signal_thread(Thread* thread, int sig, const char* reason) {
+ OSThread* osthread = thread->osthread();
+ if (osthread) {
+#if defined (SOLARIS)
+ // Note: we cannot use pthread_kill on Solaris - not because
+ // its missing, but because we do not have the pthread_t id.
+ int status = thr_kill(osthread->thread_id(), sig);
+#else
+ int status = pthread_kill(osthread->pthread_id(), sig);
+#endif
+ if (status == 0) {
+ Events::log(Thread::current(), "sent signal %d to Thread " INTPTR_FORMAT " because %s.",
+ sig, p2i(thread), reason);
+ return true;
+ }
+ }
+ return false;
+}
+
int os::Posix::unblock_thread_signal_mask(const sigset_t *set) {
return pthread_sigmask(SIG_UNBLOCK, set, NULL);
}
--- a/src/hotspot/os/windows/os_windows.cpp Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/os/windows/os_windows.cpp Fri Mar 08 11:23:30 2019 +0100
@@ -1797,6 +1797,11 @@
st->cr();
}
+bool os::signal_sent_by_kill(const void* siginfo) {
+ // TODO: Is this possible?
+ return false;
+}
+
void os::print_siginfo(outputStream *st, const void* siginfo) {
const EXCEPTION_RECORD* const er = (EXCEPTION_RECORD*)siginfo;
st->print("siginfo:");
@@ -1830,6 +1835,11 @@
st->cr();
}
+bool os::signal_thread(Thread* thread, int sig, const char* reason) {
+ // TODO: Can we kill thread?
+ return false;
+}
+
void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
// do nothing
}
--- a/src/hotspot/share/runtime/globals.hpp Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/runtime/globals.hpp Fri Mar 08 11:23:30 2019 +0100
@@ -367,7 +367,7 @@
"Print out every time compilation is longer than " \
"a given threshold") \
\
- develop(bool, SafepointALot, false, \
+ diagnostic(bool, SafepointALot, false, \
"Generate a lot of safepoints. This works with " \
"GuaranteedSafepointInterval") \
\
--- a/src/hotspot/share/runtime/os.hpp Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/runtime/os.hpp Fri Mar 08 11:23:30 2019 +0100
@@ -463,6 +463,9 @@
static void pd_start_thread(Thread* thread);
static void start_thread(Thread* thread);
+ // Returns true if successful.
+ static bool signal_thread(Thread* thread, int sig, const char* reason);
+
static void free_thread(OSThread* osthread);
// thread id on Linux/64bit is 64bit, on Windows and Solaris, it's 32bit
@@ -637,6 +640,7 @@
static void print_environment_variables(outputStream* st, const char** env_list);
static void print_context(outputStream* st, const void* context);
static void print_register_info(outputStream* st, const void* context);
+ static bool signal_sent_by_kill(const void* siginfo);
static void print_siginfo(outputStream* st, const void* siginfo);
static void print_signal_handlers(outputStream* st, char* buf, size_t buflen);
static void print_date_and_time(outputStream* st, char* buf, size_t buflen);
--- a/src/hotspot/share/runtime/safepoint.cpp Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/runtime/safepoint.cpp Fri Mar 08 11:23:30 2019 +0100
@@ -902,6 +902,16 @@
// To debug the long safepoint, specify both AbortVMOnSafepointTimeout &
// ShowMessageBoxOnError.
if (AbortVMOnSafepointTimeout) {
+ // Send the blocking thread a signal to terminate and write an error file.
+ for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur_thread = jtiwh.next(); ) {
+ if (cur_thread->safepoint_state()->is_running()) {
+ if (!os::signal_thread(cur_thread, SIGILL, "blocking a safepoint")) {
+ break; // Could not send signal. Report fatal error.
+ }
+ // Give cur_thread a chance to report the error and terminate the VM.
+ os::sleep(Thread::current(), 3000, false);
+ }
+ }
fatal("Safepoint sync time longer than " INTX_FORMAT "ms detected when executing %s.",
SafepointTimeoutDelay, VMThread::vm_operation()->name());
}
--- a/src/hotspot/share/utilities/vmError.cpp Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/utilities/vmError.cpp Fri Mar 08 11:23:30 2019 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -522,6 +522,9 @@
st->print("%s", buf);
st->print(" (0x%x)", _id); // signal number
st->print(" at pc=" PTR_FORMAT, p2i(_pc));
+ if (_siginfo != NULL && os::signal_sent_by_kill(_siginfo)) {
+ st->print(" (sent by kill)");
+ }
} else {
if (should_report_bug(_id)) {
st->print("Internal Error");
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/Safepoint/TestAbortVMOnSafepointTimeout.java Fri Mar 08 11:23:30 2019 +0100
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019, SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.*;
+import jdk.test.lib.process.*;
+
+/*
+ * @test TestAbortVMOnSafepointTimeout
+ * @summary Check if VM can kill thread which doesn't reach safepoint.
+ * @bug 8219584
+ * @requires vm.compiler2.enabled
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * java.management
+ */
+
+public class TestAbortVMOnSafepointTimeout {
+
+ public static void main(String[] args) throws Exception {
+ if (args.length > 0) {
+ int result = test_loop(3);
+ System.out.println("This message would occur after some time with result " + result);
+ return;
+ }
+
+ testWith(500, 500);
+ }
+
+ static int test_loop(int x) {
+ int sum = 0;
+ if (x != 0) {
+ // Long running loop without safepoint.
+ for (int y = 1; y < Integer.MAX_VALUE; ++y) {
+ if (y % x == 0) ++sum;
+ }
+ }
+ return sum;
+ }
+
+ public static void testWith(int sfpt_interval, int timeout_delay) throws Exception {
+ ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+ "-XX:+UnlockDiagnosticVMOptions",
+ "-XX:+SafepointTimeout",
+ "-XX:+SafepointALot",
+ "-XX:+AbortVMOnSafepointTimeout",
+ "-XX:SafepointTimeoutDelay=" + timeout_delay,
+ "-XX:GuaranteedSafepointInterval=" + sfpt_interval,
+ "-XX:-TieredCompilation",
+ "-XX:-UseCountedLoopSafepoints",
+ "-XX:LoopStripMiningIter=0",
+ "-XX:LoopUnrollLimit=0",
+ "-XX:CompileCommand=compileonly,TestAbortVMOnSafepointTimeout::test_loop",
+ "-Xcomp",
+ "-XX:-CreateCoredumpOnCrash",
+ "-Xms64m",
+ "TestAbortVMOnSafepointTimeout",
+ "runTestLoop"
+ );
+
+ OutputAnalyzer output = new OutputAnalyzer(pb.start());
+ if (Platform.isWindows()) {
+ output.shouldMatch("Safepoint sync time longer than");
+ } else {
+ output.shouldMatch("SIGILL");
+ if (Platform.isLinux()) {
+ output.shouldMatch("(sent by kill)");
+ }
+ output.shouldMatch("TestAbortVMOnSafepointTimeout.test_loop");
+ }
+ output.shouldNotHaveExitValue(0);
+ }
+}