# HG changeset patch # User rbackman # Date 1374061695 -7200 # Node ID 7d0ef675e808f15ce7ed2a2104c220efbc49b9d6 # Parent 705506c1bf493b14344e981e2e3847065b52f103 8020701: Avoid crashes in WatcherThread Reviewed-by: acorn, dcubed, dsimms diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os/posix/vm/os_posix.cpp --- a/hotspot/src/os/posix/vm/os_posix.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os/posix/vm/os_posix.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -259,3 +259,52 @@ FILE* os::open(int fd, const char* mode) { return ::fdopen(fd, mode); } + +os::WatcherThreadCrashProtection::WatcherThreadCrashProtection() { + assert(Thread::current()->is_Watcher_thread(), "Must be WatcherThread"); +} + +/* + * See the caveats for this class in os_posix.hpp + * Protects the callback call so that SIGSEGV / SIGBUS jumps back into this + * method and returns false. If none of the signals are raised, returns true. + * The callback is supposed to provide the method that should be protected. + */ +bool os::WatcherThreadCrashProtection::call(os::CrashProtectionCallback& cb) { + assert(Thread::current()->is_Watcher_thread(), "Only for WatcherThread"); + assert(!WatcherThread::watcher_thread()->has_crash_protection(), + "crash_protection already set?"); + + if (sigsetjmp(_jmpbuf, 1) == 0) { + // make sure we can see in the signal handler that we have crash protection + // installed + WatcherThread::watcher_thread()->set_crash_protection(this); + cb.call(); + // and clear the crash protection + WatcherThread::watcher_thread()->set_crash_protection(NULL); + return true; + } + // this happens when we siglongjmp() back + WatcherThread::watcher_thread()->set_crash_protection(NULL); + return false; +} + +void os::WatcherThreadCrashProtection::restore() { + assert(WatcherThread::watcher_thread()->has_crash_protection(), + "must have crash protection"); + + siglongjmp(_jmpbuf, 1); +} + +void os::WatcherThreadCrashProtection::check_crash_protection(int sig, + Thread* thread) { + + if (thread != NULL && + thread->is_Watcher_thread() && + WatcherThread::watcher_thread()->has_crash_protection()) { + + if (sig == SIGSEGV || sig == SIGBUS) { + WatcherThread::watcher_thread()->crash_protection()->restore(); + } + } +} diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os/posix/vm/os_posix.hpp --- a/hotspot/src/os/posix/vm/os_posix.hpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os/posix/vm/os_posix.hpp Wed Jul 17 13:48:15 2013 +0200 @@ -37,5 +37,24 @@ }; +/* + * Crash protection for the watcher thread. Wrap the callback + * with a sigsetjmp and in case of a SIGSEGV/SIGBUS we siglongjmp + * back. + * To be able to use this - don't take locks, don't rely on destructors, + * don't make OS library calls, don't allocate memory, don't print, + * don't call code that could leave the heap / memory in an inconsistent state, + * or anything else where we are not in control if we suddenly jump out. + */ +class WatcherThreadCrashProtection : public StackObj { +public: + WatcherThreadCrashProtection(); + bool call(os::CrashProtectionCallback& cb); + + static void check_crash_protection(int signal, Thread* thread); +private: + void restore(); + sigjmp_buf _jmpbuf; +}; #endif diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os/windows/vm/os_windows.cpp --- a/hotspot/src/os/windows/vm/os_windows.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os/windows/vm/os_windows.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -4684,6 +4684,34 @@ } } +os::WatcherThreadCrashProtection::WatcherThreadCrashProtection() { + assert(Thread::current()->is_Watcher_thread(), "Must be WatcherThread"); +} + +/* + * See the caveats for this class in os_windows.hpp + * Protects the callback call so that raised OS EXCEPTIONS causes a jump back + * into this method and returns false. If no OS EXCEPTION was raised, returns + * true. + * The callback is supposed to provide the method that should be protected. + */ +bool os::WatcherThreadCrashProtection::call(os::CrashProtectionCallback& cb) { + assert(Thread::current()->is_Watcher_thread(), "Only for WatcherThread"); + assert(!WatcherThread::watcher_thread()->has_crash_protection(), + "crash_protection already set?"); + + bool success = true; + __try { + WatcherThread::watcher_thread()->set_crash_protection(this); + cb.call(); + } __except(EXCEPTION_EXECUTE_HANDLER) { + // only for protection, nothing to do + success = false; + } + WatcherThread::watcher_thread()->set_crash_protection(NULL); + return success; +} + // An Event wraps a win32 "CreateEvent" kernel handle. // // We have a number of choices regarding "CreateEvent" win32 handle leakage: diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os/windows/vm/os_windows.hpp --- a/hotspot/src/os/windows/vm/os_windows.hpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os/windows/vm/os_windows.hpp Wed Jul 17 13:48:15 2013 +0200 @@ -102,6 +102,20 @@ static LONG WINAPI serialize_fault_filter(struct _EXCEPTION_POINTERS* e); }; +/* + * Crash protection for the watcher thread. Wrap the callback + * with a __try { call() } + * To be able to use this - don't take locks, don't rely on destructors, + * don't make OS library calls, don't allocate memory, don't print, + * don't call code that could leave the heap / memory in an inconsistent state, + * or anything else where we are not in control if we suddenly jump out. + */ +class WatcherThreadCrashProtection : public StackObj { +public: + WatcherThreadCrashProtection(); + bool call(os::CrashProtectionCallback& cb); +}; + class PlatformEvent : public CHeapObj { private: double CachePad [4] ; // increase odds that _Event is sole occupant of cache line diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp --- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -401,6 +401,10 @@ Thread* t = ThreadLocalStorage::get_thread_slow(); + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::WatcherThreadCrashProtection::check_crash_protection(sig, t); + SignalHandlerMark shm(t); // Note: it's not uncommon that JNI code uses signal/sigset to install diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp --- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -553,6 +553,10 @@ Thread* t = ThreadLocalStorage::get_thread_slow(); + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::WatcherThreadCrashProtection::check_crash_protection(sig, t); + SignalHandlerMark shm(t); // Note: it's not uncommon that JNI code uses signal/sigset to install diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp --- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -225,6 +225,10 @@ Thread* t = ThreadLocalStorage::get_thread_slow(); + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::WatcherThreadCrashProtection::check_crash_protection(sig, t); + SignalHandlerMark shm(t); // Note: it's not uncommon that JNI code uses signal/sigset to install diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp --- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -315,6 +315,10 @@ Thread* t = ThreadLocalStorage::get_thread_slow(); + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::WatcherThreadCrashProtection::check_crash_protection(sig, t); + SignalHandlerMark shm(t); if(sig == SIGPIPE || sig == SIGXFSZ) { diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp --- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -374,6 +374,10 @@ Thread* t = ThreadLocalStorage::get_thread_slow(); // slow & steady + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::WatcherThreadCrashProtection::check_crash_protection(sig, t); + SignalHandlerMark shm(t); if(sig == SIGPIPE || sig == SIGXFSZ) { diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/share/vm/runtime/mutex.cpp --- a/hotspot/src/share/vm/runtime/mutex.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/share/vm/runtime/mutex.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -1370,6 +1370,10 @@ debug_only(if (rank() != Mutex::special) \ thread->check_for_valid_safepoint_state(false);) } + if (thread->is_Watcher_thread()) { + assert(!WatcherThread::watcher_thread()->has_crash_protection(), + "locking not allowed when crash protection is set"); + } } void Monitor::check_block_state(Thread *thread) { diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/share/vm/runtime/os.cpp --- a/hotspot/src/share/vm/runtime/os.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/share/vm/runtime/os.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -595,6 +595,22 @@ NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1)); NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size)); +#ifdef ASSERT + // checking for the WatcherThread and crash_protection first + // since os::malloc can be called when the libjvm.{dll,so} is + // first loaded and we don't have a thread yet. + // try to find the thread after we see that the watcher thread + // exists and has crash protection. + WatcherThread *wt = WatcherThread::watcher_thread(); + if (wt != NULL && wt->has_crash_protection()) { + Thread* thread = ThreadLocalStorage::get_thread_slow(); + if (thread == wt) { + assert(!wt->has_crash_protection(), + "Can't malloc with crash protection from WatcherThread"); + } + } +#endif + if (size == 0) { // return a valid pointer if size is zero // if NULL is returned the calling functions assume out of memory. diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/share/vm/runtime/os.hpp --- a/hotspot/src/share/vm/runtime/os.hpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/share/vm/runtime/os.hpp Wed Jul 17 13:48:15 2013 +0200 @@ -32,15 +32,18 @@ #include "utilities/top.hpp" #ifdef TARGET_OS_FAMILY_linux # include "jvm_linux.h" +# include #endif #ifdef TARGET_OS_FAMILY_solaris # include "jvm_solaris.h" +# include #endif #ifdef TARGET_OS_FAMILY_windows # include "jvm_windows.h" #endif #ifdef TARGET_OS_FAMILY_bsd # include "jvm_bsd.h" +# include #endif // os defines the interface to operating system; this includes traditional @@ -730,6 +733,10 @@ #include "runtime/os_ext.hpp" public: + class CrashProtectionCallback : public StackObj { + public: + virtual void call() = 0; + }; // Platform dependent stuff #ifdef TARGET_OS_FAMILY_linux @@ -908,6 +915,7 @@ char pathSep); static bool set_boot_path(char fileSep, char pathSep); static char** split_path(const char* path, int* n); + }; // Note that "PAUSE" is almost always used with synchronization diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/share/vm/runtime/thread.cpp --- a/hotspot/src/share/vm/runtime/thread.cpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/share/vm/runtime/thread.cpp Wed Jul 17 13:48:15 2013 +0200 @@ -1226,7 +1226,7 @@ bool WatcherThread::_startable = false; volatile bool WatcherThread::_should_terminate = false; -WatcherThread::WatcherThread() : Thread() { +WatcherThread::WatcherThread() : Thread(), _crash_protection(NULL) { assert(watcher_thread() == NULL, "we can only allocate one WatcherThread"); if (os::create_thread(this, os::watcher_thread)) { _watcher_thread = this; diff -r 705506c1bf49 -r 7d0ef675e808 hotspot/src/share/vm/runtime/thread.hpp --- a/hotspot/src/share/vm/runtime/thread.hpp Thu Jul 18 06:47:15 2013 -0400 +++ b/hotspot/src/share/vm/runtime/thread.hpp Wed Jul 17 13:48:15 2013 +0200 @@ -733,6 +733,8 @@ static bool _startable; volatile static bool _should_terminate; // updated without holding lock + + os::WatcherThreadCrashProtection* _crash_protection; public: enum SomeConstants { delay_interval = 10 // interrupt delay in milliseconds @@ -760,6 +762,14 @@ // Otherwise the first task to enroll will trigger the start static void make_startable(); + void set_crash_protection(os::WatcherThreadCrashProtection* crash_protection) { + assert(Thread::current()->is_Watcher_thread(), "Can only be set by WatcherThread"); + _crash_protection = crash_protection; + } + + bool has_crash_protection() const { return _crash_protection != NULL; } + os::WatcherThreadCrashProtection* crash_protection() const { return _crash_protection; } + private: int sleep() const; };