8219584: Try to dump error file by thread which causes safepoint timeout
authormdoerr
Fri, 08 Mar 2019 11:23:30 +0100
changeset 54031 feea57b38a1c
parent 54030 889dae20c4c4
child 54032 ac09c2498c64
8219584: Try to dump error file by thread which causes safepoint timeout Reviewed-by: stuefe, dholmes, kvn
src/hotspot/os/posix/os_posix.cpp
src/hotspot/os/windows/os_windows.cpp
src/hotspot/share/runtime/globals.hpp
src/hotspot/share/runtime/os.hpp
src/hotspot/share/runtime/safepoint.cpp
src/hotspot/share/utilities/vmError.cpp
test/hotspot/jtreg/runtime/Safepoint/TestAbortVMOnSafepointTimeout.java
--- a/src/hotspot/os/posix/os_posix.cpp	Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/os/posix/os_posix.cpp	Fri Mar 08 11:23:30 2019 +0100
@@ -31,6 +31,7 @@
 #include "runtime/interfaceSupport.inline.hpp"
 #include "services/memTracker.hpp"
 #include "utilities/align.hpp"
+#include "utilities/events.hpp"
 #include "utilities/formatBuffer.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/vmError.hpp"
@@ -1269,6 +1270,15 @@
   return true;
 }
 
+bool os::signal_sent_by_kill(const void* siginfo) {
+  const siginfo_t* const si = (const siginfo_t*)siginfo;
+  return si->si_code == SI_USER || si->si_code == SI_QUEUE
+#ifdef SI_TKILL
+         || si->si_code == SI_TKILL
+#endif
+  ;
+}
+
 void os::print_siginfo(outputStream* os, const void* si0) {
 
   const siginfo_t* const si = (const siginfo_t*) si0;
@@ -1299,7 +1309,7 @@
   // so it depends on the context which member to use. For synchronous error signals,
   // we print si_addr, unless the signal was sent by another process or thread, in
   // which case we print out pid or tid of the sender.
-  if (si->si_code == SI_USER || si->si_code == SI_QUEUE) {
+  if (signal_sent_by_kill(si)) {
     const pid_t pid = si->si_pid;
     os->print(", si_pid: %ld", (long) pid);
     if (IS_VALID_PID(pid)) {
@@ -1325,6 +1335,25 @@
 
 }
 
+bool os::signal_thread(Thread* thread, int sig, const char* reason) {
+  OSThread* osthread = thread->osthread();
+  if (osthread) {
+#if defined (SOLARIS)
+    // Note: we cannot use pthread_kill on Solaris - not because
+    // its missing, but because we do not have the pthread_t id.
+    int status = thr_kill(osthread->thread_id(), sig);
+#else
+    int status = pthread_kill(osthread->pthread_id(), sig);
+#endif
+    if (status == 0) {
+      Events::log(Thread::current(), "sent signal %d to Thread " INTPTR_FORMAT " because %s.",
+                  sig, p2i(thread), reason);
+      return true;
+    }
+  }
+  return false;
+}
+
 int os::Posix::unblock_thread_signal_mask(const sigset_t *set) {
   return pthread_sigmask(SIG_UNBLOCK, set, NULL);
 }
--- a/src/hotspot/os/windows/os_windows.cpp	Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/os/windows/os_windows.cpp	Fri Mar 08 11:23:30 2019 +0100
@@ -1797,6 +1797,11 @@
   st->cr();
 }
 
+bool os::signal_sent_by_kill(const void* siginfo) {
+  // TODO: Is this possible?
+  return false;
+}
+
 void os::print_siginfo(outputStream *st, const void* siginfo) {
   const EXCEPTION_RECORD* const er = (EXCEPTION_RECORD*)siginfo;
   st->print("siginfo:");
@@ -1830,6 +1835,11 @@
   st->cr();
 }
 
+bool os::signal_thread(Thread* thread, int sig, const char* reason) {
+  // TODO: Can we kill thread?
+  return false;
+}
+
 void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
   // do nothing
 }
--- a/src/hotspot/share/runtime/globals.hpp	Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/runtime/globals.hpp	Fri Mar 08 11:23:30 2019 +0100
@@ -367,7 +367,7 @@
           "Print out every time compilation is longer than "                \
           "a given threshold")                                              \
                                                                             \
-  develop(bool, SafepointALot, false,                                       \
+  diagnostic(bool, SafepointALot, false,                                    \
           "Generate a lot of safepoints. This works with "                  \
           "GuaranteedSafepointInterval")                                    \
                                                                             \
--- a/src/hotspot/share/runtime/os.hpp	Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/runtime/os.hpp	Fri Mar 08 11:23:30 2019 +0100
@@ -463,6 +463,9 @@
   static void pd_start_thread(Thread* thread);
   static void start_thread(Thread* thread);
 
+  // Returns true if successful.
+  static bool signal_thread(Thread* thread, int sig, const char* reason);
+
   static void free_thread(OSThread* osthread);
 
   // thread id on Linux/64bit is 64bit, on Windows and Solaris, it's 32bit
@@ -637,6 +640,7 @@
   static void print_environment_variables(outputStream* st, const char** env_list);
   static void print_context(outputStream* st, const void* context);
   static void print_register_info(outputStream* st, const void* context);
+  static bool signal_sent_by_kill(const void* siginfo);
   static void print_siginfo(outputStream* st, const void* siginfo);
   static void print_signal_handlers(outputStream* st, char* buf, size_t buflen);
   static void print_date_and_time(outputStream* st, char* buf, size_t buflen);
--- a/src/hotspot/share/runtime/safepoint.cpp	Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/runtime/safepoint.cpp	Fri Mar 08 11:23:30 2019 +0100
@@ -902,6 +902,16 @@
   // To debug the long safepoint, specify both AbortVMOnSafepointTimeout &
   // ShowMessageBoxOnError.
   if (AbortVMOnSafepointTimeout) {
+    // Send the blocking thread a signal to terminate and write an error file.
+    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur_thread = jtiwh.next(); ) {
+      if (cur_thread->safepoint_state()->is_running()) {
+        if (!os::signal_thread(cur_thread, SIGILL, "blocking a safepoint")) {
+          break; // Could not send signal. Report fatal error.
+        }
+        // Give cur_thread a chance to report the error and terminate the VM.
+        os::sleep(Thread::current(), 3000, false);
+      }
+    }
     fatal("Safepoint sync time longer than " INTX_FORMAT "ms detected when executing %s.",
           SafepointTimeoutDelay, VMThread::vm_operation()->name());
   }
--- a/src/hotspot/share/utilities/vmError.cpp	Thu Mar 07 16:15:43 2019 +0100
+++ b/src/hotspot/share/utilities/vmError.cpp	Fri Mar 08 11:23:30 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -522,6 +522,9 @@
        st->print("%s", buf);
        st->print(" (0x%x)", _id);                // signal number
        st->print(" at pc=" PTR_FORMAT, p2i(_pc));
+       if (_siginfo != NULL && os::signal_sent_by_kill(_siginfo)) {
+         st->print(" (sent by kill)");
+       }
      } else {
        if (should_report_bug(_id)) {
          st->print("Internal Error");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/Safepoint/TestAbortVMOnSafepointTimeout.java	Fri Mar 08 11:23:30 2019 +0100
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019, SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.*;
+import jdk.test.lib.process.*;
+
+/*
+ * @test TestAbortVMOnSafepointTimeout
+ * @summary Check if VM can kill thread which doesn't reach safepoint.
+ * @bug 8219584
+ * @requires vm.compiler2.enabled
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ *          java.management
+ */
+
+public class TestAbortVMOnSafepointTimeout {
+
+    public static void main(String[] args) throws Exception {
+        if (args.length > 0) {
+            int result = test_loop(3);
+            System.out.println("This message would occur after some time with result " + result);
+            return;
+        }
+
+        testWith(500, 500);
+    }
+
+    static int test_loop(int x) {
+        int sum = 0;
+        if (x != 0) {
+            // Long running loop without safepoint.
+            for (int y = 1; y < Integer.MAX_VALUE; ++y) {
+                if (y % x == 0) ++sum;
+            }
+        }
+        return sum;
+    }
+
+    public static void testWith(int sfpt_interval, int timeout_delay) throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+                "-XX:+UnlockDiagnosticVMOptions",
+                "-XX:+SafepointTimeout",
+                "-XX:+SafepointALot",
+                "-XX:+AbortVMOnSafepointTimeout",
+                "-XX:SafepointTimeoutDelay=" + timeout_delay,
+                "-XX:GuaranteedSafepointInterval=" + sfpt_interval,
+                "-XX:-TieredCompilation",
+                "-XX:-UseCountedLoopSafepoints",
+                "-XX:LoopStripMiningIter=0",
+                "-XX:LoopUnrollLimit=0",
+                "-XX:CompileCommand=compileonly,TestAbortVMOnSafepointTimeout::test_loop",
+                "-Xcomp",
+                "-XX:-CreateCoredumpOnCrash",
+                "-Xms64m",
+                "TestAbortVMOnSafepointTimeout",
+                "runTestLoop"
+        );
+
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        if (Platform.isWindows()) {
+            output.shouldMatch("Safepoint sync time longer than");
+        } else {
+            output.shouldMatch("SIGILL");
+            if (Platform.isLinux()) {
+                output.shouldMatch("(sent by kill)");
+            }
+            output.shouldMatch("TestAbortVMOnSafepointTimeout.test_loop");
+        }
+        output.shouldNotHaveExitValue(0);
+    }
+}