# HG changeset patch # User mdoerr # Date 1552040610 -3600 # Node ID feea57b38a1cc0cdc7cd76bddf42b2b77e0501a5 # Parent 889dae20c4c49ee3c1011c246f5d0c054824574e 8219584: Try to dump error file by thread which causes safepoint timeout Reviewed-by: stuefe, dholmes, kvn diff -r 889dae20c4c4 -r feea57b38a1c src/hotspot/os/posix/os_posix.cpp --- a/src/hotspot/os/posix/os_posix.cpp Thu Mar 07 16:15:43 2019 +0100 +++ b/src/hotspot/os/posix/os_posix.cpp Fri Mar 08 11:23:30 2019 +0100 @@ -31,6 +31,7 @@ #include "runtime/interfaceSupport.inline.hpp" #include "services/memTracker.hpp" #include "utilities/align.hpp" +#include "utilities/events.hpp" #include "utilities/formatBuffer.hpp" #include "utilities/macros.hpp" #include "utilities/vmError.hpp" @@ -1269,6 +1270,15 @@ return true; } +bool os::signal_sent_by_kill(const void* siginfo) { + const siginfo_t* const si = (const siginfo_t*)siginfo; + return si->si_code == SI_USER || si->si_code == SI_QUEUE +#ifdef SI_TKILL + || si->si_code == SI_TKILL +#endif + ; +} + void os::print_siginfo(outputStream* os, const void* si0) { const siginfo_t* const si = (const siginfo_t*) si0; @@ -1299,7 +1309,7 @@ // so it depends on the context which member to use. For synchronous error signals, // we print si_addr, unless the signal was sent by another process or thread, in // which case we print out pid or tid of the sender. - if (si->si_code == SI_USER || si->si_code == SI_QUEUE) { + if (signal_sent_by_kill(si)) { const pid_t pid = si->si_pid; os->print(", si_pid: %ld", (long) pid); if (IS_VALID_PID(pid)) { @@ -1325,6 +1335,25 @@ } +bool os::signal_thread(Thread* thread, int sig, const char* reason) { + OSThread* osthread = thread->osthread(); + if (osthread) { +#if defined (SOLARIS) + // Note: we cannot use pthread_kill on Solaris - not because + // its missing, but because we do not have the pthread_t id. + int status = thr_kill(osthread->thread_id(), sig); +#else + int status = pthread_kill(osthread->pthread_id(), sig); +#endif + if (status == 0) { + Events::log(Thread::current(), "sent signal %d to Thread " INTPTR_FORMAT " because %s.", + sig, p2i(thread), reason); + return true; + } + } + return false; +} + int os::Posix::unblock_thread_signal_mask(const sigset_t *set) { return pthread_sigmask(SIG_UNBLOCK, set, NULL); } diff -r 889dae20c4c4 -r feea57b38a1c src/hotspot/os/windows/os_windows.cpp --- a/src/hotspot/os/windows/os_windows.cpp Thu Mar 07 16:15:43 2019 +0100 +++ b/src/hotspot/os/windows/os_windows.cpp Fri Mar 08 11:23:30 2019 +0100 @@ -1797,6 +1797,11 @@ st->cr(); } +bool os::signal_sent_by_kill(const void* siginfo) { + // TODO: Is this possible? + return false; +} + void os::print_siginfo(outputStream *st, const void* siginfo) { const EXCEPTION_RECORD* const er = (EXCEPTION_RECORD*)siginfo; st->print("siginfo:"); @@ -1830,6 +1835,11 @@ st->cr(); } +bool os::signal_thread(Thread* thread, int sig, const char* reason) { + // TODO: Can we kill thread? + return false; +} + void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) { // do nothing } diff -r 889dae20c4c4 -r feea57b38a1c src/hotspot/share/runtime/globals.hpp --- a/src/hotspot/share/runtime/globals.hpp Thu Mar 07 16:15:43 2019 +0100 +++ b/src/hotspot/share/runtime/globals.hpp Fri Mar 08 11:23:30 2019 +0100 @@ -367,7 +367,7 @@ "Print out every time compilation is longer than " \ "a given threshold") \ \ - develop(bool, SafepointALot, false, \ + diagnostic(bool, SafepointALot, false, \ "Generate a lot of safepoints. This works with " \ "GuaranteedSafepointInterval") \ \ diff -r 889dae20c4c4 -r feea57b38a1c src/hotspot/share/runtime/os.hpp --- a/src/hotspot/share/runtime/os.hpp Thu Mar 07 16:15:43 2019 +0100 +++ b/src/hotspot/share/runtime/os.hpp Fri Mar 08 11:23:30 2019 +0100 @@ -463,6 +463,9 @@ static void pd_start_thread(Thread* thread); static void start_thread(Thread* thread); + // Returns true if successful. + static bool signal_thread(Thread* thread, int sig, const char* reason); + static void free_thread(OSThread* osthread); // thread id on Linux/64bit is 64bit, on Windows and Solaris, it's 32bit @@ -637,6 +640,7 @@ static void print_environment_variables(outputStream* st, const char** env_list); static void print_context(outputStream* st, const void* context); static void print_register_info(outputStream* st, const void* context); + static bool signal_sent_by_kill(const void* siginfo); static void print_siginfo(outputStream* st, const void* siginfo); static void print_signal_handlers(outputStream* st, char* buf, size_t buflen); static void print_date_and_time(outputStream* st, char* buf, size_t buflen); diff -r 889dae20c4c4 -r feea57b38a1c src/hotspot/share/runtime/safepoint.cpp --- a/src/hotspot/share/runtime/safepoint.cpp Thu Mar 07 16:15:43 2019 +0100 +++ b/src/hotspot/share/runtime/safepoint.cpp Fri Mar 08 11:23:30 2019 +0100 @@ -902,6 +902,16 @@ // To debug the long safepoint, specify both AbortVMOnSafepointTimeout & // ShowMessageBoxOnError. if (AbortVMOnSafepointTimeout) { + // Send the blocking thread a signal to terminate and write an error file. + for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur_thread = jtiwh.next(); ) { + if (cur_thread->safepoint_state()->is_running()) { + if (!os::signal_thread(cur_thread, SIGILL, "blocking a safepoint")) { + break; // Could not send signal. Report fatal error. + } + // Give cur_thread a chance to report the error and terminate the VM. + os::sleep(Thread::current(), 3000, false); + } + } fatal("Safepoint sync time longer than " INTX_FORMAT "ms detected when executing %s.", SafepointTimeoutDelay, VMThread::vm_operation()->name()); } diff -r 889dae20c4c4 -r feea57b38a1c src/hotspot/share/utilities/vmError.cpp --- a/src/hotspot/share/utilities/vmError.cpp Thu Mar 07 16:15:43 2019 +0100 +++ b/src/hotspot/share/utilities/vmError.cpp Fri Mar 08 11:23:30 2019 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -522,6 +522,9 @@ st->print("%s", buf); st->print(" (0x%x)", _id); // signal number st->print(" at pc=" PTR_FORMAT, p2i(_pc)); + if (_siginfo != NULL && os::signal_sent_by_kill(_siginfo)) { + st->print(" (sent by kill)"); + } } else { if (should_report_bug(_id)) { st->print("Internal Error"); diff -r 889dae20c4c4 -r feea57b38a1c test/hotspot/jtreg/runtime/Safepoint/TestAbortVMOnSafepointTimeout.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/hotspot/jtreg/runtime/Safepoint/TestAbortVMOnSafepointTimeout.java Fri Mar 08 11:23:30 2019 +0100 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019, SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import jdk.test.lib.*; +import jdk.test.lib.process.*; + +/* + * @test TestAbortVMOnSafepointTimeout + * @summary Check if VM can kill thread which doesn't reach safepoint. + * @bug 8219584 + * @requires vm.compiler2.enabled + * @library /test/lib + * @modules java.base/jdk.internal.misc + * java.management + */ + +public class TestAbortVMOnSafepointTimeout { + + public static void main(String[] args) throws Exception { + if (args.length > 0) { + int result = test_loop(3); + System.out.println("This message would occur after some time with result " + result); + return; + } + + testWith(500, 500); + } + + static int test_loop(int x) { + int sum = 0; + if (x != 0) { + // Long running loop without safepoint. + for (int y = 1; y < Integer.MAX_VALUE; ++y) { + if (y % x == 0) ++sum; + } + } + return sum; + } + + public static void testWith(int sfpt_interval, int timeout_delay) throws Exception { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( + "-XX:+UnlockDiagnosticVMOptions", + "-XX:+SafepointTimeout", + "-XX:+SafepointALot", + "-XX:+AbortVMOnSafepointTimeout", + "-XX:SafepointTimeoutDelay=" + timeout_delay, + "-XX:GuaranteedSafepointInterval=" + sfpt_interval, + "-XX:-TieredCompilation", + "-XX:-UseCountedLoopSafepoints", + "-XX:LoopStripMiningIter=0", + "-XX:LoopUnrollLimit=0", + "-XX:CompileCommand=compileonly,TestAbortVMOnSafepointTimeout::test_loop", + "-Xcomp", + "-XX:-CreateCoredumpOnCrash", + "-Xms64m", + "TestAbortVMOnSafepointTimeout", + "runTestLoop" + ); + + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + if (Platform.isWindows()) { + output.shouldMatch("Safepoint sync time longer than"); + } else { + output.shouldMatch("SIGILL"); + if (Platform.isLinux()) { + output.shouldMatch("(sent by kill)"); + } + output.shouldMatch("TestAbortVMOnSafepointTimeout.test_loop"); + } + output.shouldNotHaveExitValue(0); + } +}