8020701: Avoid crashes in WatcherThread
authorrbackman
Wed, 17 Jul 2013 13:48:15 +0200
changeset 18943 7d0ef675e808
parent 18942 705506c1bf49
child 18945 1225c36dacd3
child 18946 65a870954fd7
8020701: Avoid crashes in WatcherThread Reviewed-by: acorn, dcubed, dsimms
hotspot/src/os/posix/vm/os_posix.cpp
hotspot/src/os/posix/vm/os_posix.hpp
hotspot/src/os/windows/vm/os_windows.cpp
hotspot/src/os/windows/vm/os_windows.hpp
hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp
hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp
hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp
hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp
hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
hotspot/src/share/vm/runtime/mutex.cpp
hotspot/src/share/vm/runtime/os.cpp
hotspot/src/share/vm/runtime/os.hpp
hotspot/src/share/vm/runtime/thread.cpp
hotspot/src/share/vm/runtime/thread.hpp
--- a/hotspot/src/os/posix/vm/os_posix.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os/posix/vm/os_posix.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -259,3 +259,52 @@
 FILE* os::open(int fd, const char* mode) {
   return ::fdopen(fd, mode);
 }
+
+os::WatcherThreadCrashProtection::WatcherThreadCrashProtection() {
+  assert(Thread::current()->is_Watcher_thread(), "Must be WatcherThread");
+}
+
+/*
+ * See the caveats for this class in os_posix.hpp
+ * Protects the callback call so that SIGSEGV / SIGBUS jumps back into this
+ * method and returns false. If none of the signals are raised, returns true.
+ * The callback is supposed to provide the method that should be protected.
+ */
+bool os::WatcherThreadCrashProtection::call(os::CrashProtectionCallback& cb) {
+  assert(Thread::current()->is_Watcher_thread(), "Only for WatcherThread");
+  assert(!WatcherThread::watcher_thread()->has_crash_protection(),
+      "crash_protection already set?");
+
+  if (sigsetjmp(_jmpbuf, 1) == 0) {
+    // make sure we can see in the signal handler that we have crash protection
+    // installed
+    WatcherThread::watcher_thread()->set_crash_protection(this);
+    cb.call();
+    // and clear the crash protection
+    WatcherThread::watcher_thread()->set_crash_protection(NULL);
+    return true;
+  }
+  // this happens when we siglongjmp() back
+  WatcherThread::watcher_thread()->set_crash_protection(NULL);
+  return false;
+}
+
+void os::WatcherThreadCrashProtection::restore() {
+  assert(WatcherThread::watcher_thread()->has_crash_protection(),
+      "must have crash protection");
+
+  siglongjmp(_jmpbuf, 1);
+}
+
+void os::WatcherThreadCrashProtection::check_crash_protection(int sig,
+    Thread* thread) {
+
+  if (thread != NULL &&
+      thread->is_Watcher_thread() &&
+      WatcherThread::watcher_thread()->has_crash_protection()) {
+
+    if (sig == SIGSEGV || sig == SIGBUS) {
+      WatcherThread::watcher_thread()->crash_protection()->restore();
+    }
+  }
+}
--- a/hotspot/src/os/posix/vm/os_posix.hpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os/posix/vm/os_posix.hpp	Wed Jul 17 13:48:15 2013 +0200
@@ -37,5 +37,24 @@
 
 };
 
+/*
+ * Crash protection for the watcher thread. Wrap the callback
+ * with a sigsetjmp and in case of a SIGSEGV/SIGBUS we siglongjmp
+ * back.
+ * To be able to use this - don't take locks, don't rely on destructors,
+ * don't make OS library calls, don't allocate memory, don't print,
+ * don't call code that could leave the heap / memory in an inconsistent state,
+ * or anything else where we are not in control if we suddenly jump out.
+ */
+class WatcherThreadCrashProtection : public StackObj {
+public:
+  WatcherThreadCrashProtection();
+  bool call(os::CrashProtectionCallback& cb);
+
+  static void check_crash_protection(int signal, Thread* thread);
+private:
+  void restore();
+  sigjmp_buf _jmpbuf;
+};
 
 #endif
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -4684,6 +4684,34 @@
   }
 }
 
+os::WatcherThreadCrashProtection::WatcherThreadCrashProtection() {
+  assert(Thread::current()->is_Watcher_thread(), "Must be WatcherThread");
+}
+
+/*
+ * See the caveats for this class in os_windows.hpp
+ * Protects the callback call so that raised OS EXCEPTIONS causes a jump back
+ * into this method and returns false. If no OS EXCEPTION was raised, returns
+ * true.
+ * The callback is supposed to provide the method that should be protected.
+ */
+bool os::WatcherThreadCrashProtection::call(os::CrashProtectionCallback& cb) {
+  assert(Thread::current()->is_Watcher_thread(), "Only for WatcherThread");
+  assert(!WatcherThread::watcher_thread()->has_crash_protection(),
+      "crash_protection already set?");
+
+  bool success = true;
+  __try {
+    WatcherThread::watcher_thread()->set_crash_protection(this);
+    cb.call();
+  } __except(EXCEPTION_EXECUTE_HANDLER) {
+    // only for protection, nothing to do
+    success = false;
+  }
+  WatcherThread::watcher_thread()->set_crash_protection(NULL);
+  return success;
+}
+
 // An Event wraps a win32 "CreateEvent" kernel handle.
 //
 // We have a number of choices regarding "CreateEvent" win32 handle leakage:
--- a/hotspot/src/os/windows/vm/os_windows.hpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os/windows/vm/os_windows.hpp	Wed Jul 17 13:48:15 2013 +0200
@@ -102,6 +102,20 @@
   static LONG WINAPI serialize_fault_filter(struct _EXCEPTION_POINTERS* e);
 };
 
+/*
+ * Crash protection for the watcher thread. Wrap the callback
+ * with a __try { call() }
+ * To be able to use this - don't take locks, don't rely on destructors,
+ * don't make OS library calls, don't allocate memory, don't print,
+ * don't call code that could leave the heap / memory in an inconsistent state,
+ * or anything else where we are not in control if we suddenly jump out.
+ */
+class WatcherThreadCrashProtection : public StackObj {
+public:
+  WatcherThreadCrashProtection();
+  bool call(os::CrashProtectionCallback& cb);
+};
+
 class PlatformEvent : public CHeapObj<mtInternal> {
   private:
     double CachePad [4] ;   // increase odds that _Event is sole occupant of cache line
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -401,6 +401,10 @@
 
   Thread* t = ThreadLocalStorage::get_thread_slow();
 
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
   SignalHandlerMark shm(t);
 
   // Note: it's not uncommon that JNI code uses signal/sigset to install
--- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -553,6 +553,10 @@
 
   Thread* t = ThreadLocalStorage::get_thread_slow();
 
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
   SignalHandlerMark shm(t);
 
   // Note: it's not uncommon that JNI code uses signal/sigset to install
--- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -225,6 +225,10 @@
 
   Thread* t = ThreadLocalStorage::get_thread_slow();
 
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
   SignalHandlerMark shm(t);
 
   // Note: it's not uncommon that JNI code uses signal/sigset to install
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -315,6 +315,10 @@
 
   Thread* t = ThreadLocalStorage::get_thread_slow();
 
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
   SignalHandlerMark shm(t);
 
   if(sig == SIGPIPE || sig == SIGXFSZ) {
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -374,6 +374,10 @@
 
   Thread* t = ThreadLocalStorage::get_thread_slow();  // slow & steady
 
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::WatcherThreadCrashProtection::check_crash_protection(sig, t);
+
   SignalHandlerMark shm(t);
 
   if(sig == SIGPIPE || sig == SIGXFSZ) {
--- a/hotspot/src/share/vm/runtime/mutex.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/share/vm/runtime/mutex.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -1370,6 +1370,10 @@
     debug_only(if (rank() != Mutex::special) \
       thread->check_for_valid_safepoint_state(false);)
   }
+  if (thread->is_Watcher_thread()) {
+    assert(!WatcherThread::watcher_thread()->has_crash_protection(),
+        "locking not allowed when crash protection is set");
+  }
 }
 
 void Monitor::check_block_state(Thread *thread) {
--- a/hotspot/src/share/vm/runtime/os.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/share/vm/runtime/os.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -595,6 +595,22 @@
   NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1));
   NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size));
 
+#ifdef ASSERT
+  // checking for the WatcherThread and crash_protection first
+  // since os::malloc can be called when the libjvm.{dll,so} is
+  // first loaded and we don't have a thread yet.
+  // try to find the thread after we see that the watcher thread
+  // exists and has crash protection.
+  WatcherThread *wt = WatcherThread::watcher_thread();
+  if (wt != NULL && wt->has_crash_protection()) {
+    Thread* thread = ThreadLocalStorage::get_thread_slow();
+    if (thread == wt) {
+      assert(!wt->has_crash_protection(),
+          "Can't malloc with crash protection from WatcherThread");
+    }
+  }
+#endif
+
   if (size == 0) {
     // return a valid pointer if size is zero
     // if NULL is returned the calling functions assume out of memory.
--- a/hotspot/src/share/vm/runtime/os.hpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 17 13:48:15 2013 +0200
@@ -32,15 +32,18 @@
 #include "utilities/top.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "jvm_linux.h"
+# include <setjmp.h>
 #endif
 #ifdef TARGET_OS_FAMILY_solaris
 # include "jvm_solaris.h"
+# include <setjmp.h>
 #endif
 #ifdef TARGET_OS_FAMILY_windows
 # include "jvm_windows.h"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "jvm_bsd.h"
+# include <setjmp.h>
 #endif
 
 // os defines the interface to operating system; this includes traditional
@@ -730,6 +733,10 @@
 #include "runtime/os_ext.hpp"
 
  public:
+  class CrashProtectionCallback : public StackObj {
+  public:
+    virtual void call() = 0;
+  };
 
   // Platform dependent stuff
 #ifdef TARGET_OS_FAMILY_linux
@@ -908,6 +915,7 @@
                                 char pathSep);
   static bool set_boot_path(char fileSep, char pathSep);
   static char** split_path(const char* path, int* n);
+
 };
 
 // Note that "PAUSE" is almost always used with synchronization
--- a/hotspot/src/share/vm/runtime/thread.cpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Wed Jul 17 13:48:15 2013 +0200
@@ -1226,7 +1226,7 @@
 bool WatcherThread::_startable = false;
 volatile bool  WatcherThread::_should_terminate = false;
 
-WatcherThread::WatcherThread() : Thread() {
+WatcherThread::WatcherThread() : Thread(), _crash_protection(NULL) {
   assert(watcher_thread() == NULL, "we can only allocate one WatcherThread");
   if (os::create_thread(this, os::watcher_thread)) {
     _watcher_thread = this;
--- a/hotspot/src/share/vm/runtime/thread.hpp	Thu Jul 18 06:47:15 2013 -0400
+++ b/hotspot/src/share/vm/runtime/thread.hpp	Wed Jul 17 13:48:15 2013 +0200
@@ -733,6 +733,8 @@
 
   static bool _startable;
   volatile static bool _should_terminate; // updated without holding lock
+
+  os::WatcherThreadCrashProtection* _crash_protection;
  public:
   enum SomeConstants {
     delay_interval = 10                          // interrupt delay in milliseconds
@@ -760,6 +762,14 @@
   // Otherwise the first task to enroll will trigger the start
   static void make_startable();
 
+  void set_crash_protection(os::WatcherThreadCrashProtection* crash_protection) {
+    assert(Thread::current()->is_Watcher_thread(), "Can only be set by WatcherThread");
+    _crash_protection = crash_protection;
+  }
+
+  bool has_crash_protection() const { return _crash_protection != NULL; }
+  os::WatcherThreadCrashProtection* crash_protection() const { return _crash_protection; }
+
  private:
   int sleep() const;
 };