8022335: Native stack walk while generating hs_err does not work on Windows x64
authoriklam
Fri, 06 Sep 2013 08:42:42 -0700
changeset 19952 bc974e92f881
parent 19951 cece69b005c0
child 19953 bcb70cd411bc
8022335: Native stack walk while generating hs_err does not work on Windows x64 Summary: Use WinDbg API StackWalk64() Reviewed-by: zgu, dholmes
hotspot/src/os/windows/vm/decoder_windows.cpp
hotspot/src/os/windows/vm/decoder_windows.hpp
hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp
hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp
hotspot/src/share/vm/runtime/frame.cpp
hotspot/src/share/vm/runtime/frame.hpp
hotspot/src/share/vm/runtime/os.hpp
hotspot/src/share/vm/utilities/decoder.cpp
hotspot/src/share/vm/utilities/decoder.hpp
hotspot/src/share/vm/utilities/vmError.cpp
hotspot/src/share/vm/utilities/vmError.hpp
--- a/hotspot/src/os/windows/vm/decoder_windows.cpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/os/windows/vm/decoder_windows.cpp	Fri Sep 06 08:42:42 2013 -0700
@@ -32,7 +32,11 @@
   _can_decode_in_vm = false;
   _pfnSymGetSymFromAddr64 = NULL;
   _pfnUndecorateSymbolName = NULL;
-
+#ifdef AMD64
+  _pfnStackWalk64 = NULL;
+  _pfnSymFunctionTableAccess64 = NULL;
+  _pfnSymGetModuleBase64 = NULL;
+#endif
   _decoder_status = no_error;
   initialize();
 }
@@ -53,14 +57,24 @@
     _pfnUndecorateSymbolName = (pfn_UndecorateSymbolName)::GetProcAddress(handle, "UnDecorateSymbolName");
 
     if (_pfnSymSetOptions == NULL || _pfnSymInitialize == NULL || _pfnSymGetSymFromAddr64 == NULL) {
-      _pfnSymGetSymFromAddr64 = NULL;
-      _pfnUndecorateSymbolName = NULL;
-      ::FreeLibrary(handle);
-      _dbghelp_handle = NULL;
+      uninitialize();
       _decoder_status = helper_func_error;
       return;
     }
 
+#ifdef AMD64
+    _pfnStackWalk64 = (pfn_StackWalk64)::GetProcAddress(handle, "StackWalk64");
+    _pfnSymFunctionTableAccess64 = (pfn_SymFunctionTableAccess64)::GetProcAddress(handle, "SymFunctionTableAccess64");
+    _pfnSymGetModuleBase64 = (pfn_SymGetModuleBase64)::GetProcAddress(handle, "SymGetModuleBase64");
+    if (_pfnStackWalk64 == NULL || _pfnSymFunctionTableAccess64 == NULL || _pfnSymGetModuleBase64 == NULL) {
+      // We can't call StackWalk64 to walk the stack, but we are still
+      // able to decode the symbols. Let's limp on.
+      _pfnStackWalk64 = NULL;
+      _pfnSymFunctionTableAccess64 = NULL;
+      _pfnSymGetModuleBase64 = NULL;
+    }
+#endif
+
     HANDLE hProcess = ::GetCurrentProcess();
     _pfnSymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_EXACT_SYMBOLS);
     if (!_pfnSymInitialize(hProcess, NULL, TRUE)) {
@@ -156,6 +170,11 @@
 void WindowsDecoder::uninitialize() {
   _pfnSymGetSymFromAddr64 = NULL;
   _pfnUndecorateSymbolName = NULL;
+#ifdef AMD64
+  _pfnStackWalk64 = NULL;
+  _pfnSymFunctionTableAccess64 = NULL;
+  _pfnSymGetModuleBase64 = NULL;
+#endif
   if (_dbghelp_handle != NULL) {
     ::FreeLibrary(_dbghelp_handle);
   }
@@ -195,3 +214,65 @@
          _pfnUndecorateSymbolName(symbol, buf, buflen, UNDNAME_COMPLETE);
 }
 
+#ifdef AMD64
+BOOL WindowsDbgHelp::StackWalk64(DWORD MachineType,
+                                 HANDLE hProcess,
+                                 HANDLE hThread,
+                                 LPSTACKFRAME64 StackFrame,
+                                 PVOID ContextRecord,
+                                 PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
+                                 PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
+                                 PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
+                                 PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress) {
+  DecoderLocker locker;
+  WindowsDecoder* wd = (WindowsDecoder*)locker.decoder();
+
+  if (!wd->has_error() && wd->_pfnStackWalk64) {
+    return wd->_pfnStackWalk64(MachineType,
+                               hProcess,
+                               hThread,
+                               StackFrame,
+                               ContextRecord,
+                               ReadMemoryRoutine,
+                               FunctionTableAccessRoutine,
+                               GetModuleBaseRoutine,
+                               TranslateAddress);
+  } else {
+    return false;
+  }
+}
+
+PVOID WindowsDbgHelp::SymFunctionTableAccess64(HANDLE hProcess, DWORD64 AddrBase) {
+  DecoderLocker locker;
+  WindowsDecoder* wd = (WindowsDecoder*)locker.decoder();
+
+  if (!wd->has_error() && wd->_pfnSymFunctionTableAccess64) {
+    return wd->_pfnSymFunctionTableAccess64(hProcess, AddrBase);
+  } else {
+    return NULL;
+  }
+}
+
+pfn_SymFunctionTableAccess64 WindowsDbgHelp::pfnSymFunctionTableAccess64() {
+  DecoderLocker locker;
+  WindowsDecoder* wd = (WindowsDecoder*)locker.decoder();
+
+  if (!wd->has_error()) {
+    return wd->_pfnSymFunctionTableAccess64;
+  } else {
+    return NULL;
+  }
+}
+
+pfn_SymGetModuleBase64 WindowsDbgHelp::pfnSymGetModuleBase64() {
+  DecoderLocker locker;
+  WindowsDecoder* wd = (WindowsDecoder*)locker.decoder();
+
+  if (!wd->has_error()) {
+    return wd->_pfnSymGetModuleBase64;
+  } else {
+    return NULL;
+  }
+}
+
+#endif // AMD64
--- a/hotspot/src/os/windows/vm/decoder_windows.hpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/os/windows/vm/decoder_windows.hpp	Fri Sep 06 08:42:42 2013 -0700
@@ -38,6 +38,20 @@
 typedef BOOL  (WINAPI *pfn_SymSetSearchPath)(HANDLE, PCTSTR);
 typedef BOOL  (WINAPI *pfn_SymGetSearchPath)(HANDLE, PTSTR, int);
 
+#ifdef AMD64
+typedef BOOL  (WINAPI *pfn_StackWalk64)(DWORD MachineType,
+                                        HANDLE hProcess,
+                                        HANDLE hThread,
+                                        LPSTACKFRAME64 StackFrame,
+                                        PVOID ContextRecord,
+                                        PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
+                                        PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
+                                        PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
+                                        PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress);
+typedef PVOID (WINAPI *pfn_SymFunctionTableAccess64)(HANDLE hProcess, DWORD64 AddrBase);
+typedef DWORD64 (WINAPI *pfn_SymGetModuleBase64)(HANDLE hProcess, DWORD64 dwAddr);
+#endif
+
 class WindowsDecoder : public AbstractDecoder {
 
 public:
@@ -61,7 +75,34 @@
   bool                      _can_decode_in_vm;
   pfn_SymGetSymFromAddr64   _pfnSymGetSymFromAddr64;
   pfn_UndecorateSymbolName  _pfnUndecorateSymbolName;
+#ifdef AMD64
+  pfn_StackWalk64              _pfnStackWalk64;
+  pfn_SymFunctionTableAccess64 _pfnSymFunctionTableAccess64;
+  pfn_SymGetModuleBase64       _pfnSymGetModuleBase64;
+
+  friend class WindowsDbgHelp;
+#endif
 };
 
+#ifdef AMD64
+// TODO: refactor and move the handling of dbghelp.dll outside of Decoder
+class WindowsDbgHelp : public Decoder {
+public:
+  static BOOL StackWalk64(DWORD MachineType,
+                          HANDLE hProcess,
+                          HANDLE hThread,
+                          LPSTACKFRAME64 StackFrame,
+                          PVOID ContextRecord,
+                          PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
+                          PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
+                          PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
+                          PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress);
+  static PVOID SymFunctionTableAccess64(HANDLE hProcess, DWORD64 AddrBase);
+
+  static pfn_SymFunctionTableAccess64 pfnSymFunctionTableAccess64();
+  static pfn_SymGetModuleBase64       pfnSymGetModuleBase64();
+};
+#endif
+
 #endif // OS_WINDOWS_VM_DECODER_WINDOWS_HPP
 
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Fri Sep 06 08:42:42 2013 -0700
@@ -29,6 +29,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
+#include "decoder_windows.hpp"
 #include "interpreter/interpreter.hpp"
 #include "jvm_windows.h"
 #include "memory/allocation.inline.hpp"
@@ -327,6 +328,94 @@
 
 cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap;
 
+#ifdef AMD64
+/*
+ * Windows/x64 does not use stack frames the way expected by Java:
+ * [1] in most cases, there is no frame pointer. All locals are addressed via RSP
+ * [2] in rare cases, when alloca() is used, a frame pointer is used, but this may
+ *     not be RBP.
+ * See http://msdn.microsoft.com/en-us/library/ew5tede7.aspx
+ *
+ * So it's not possible to print the native stack using the
+ *     while (...) {...  fr = os::get_sender_for_C_frame(&fr); }
+ * loop in vmError.cpp. We need to roll our own loop.
+ */
+bool os::platform_print_native_stack(outputStream* st, void* context,
+                                     char *buf, int buf_size)
+{
+  CONTEXT ctx;
+  if (context != NULL) {
+    memcpy(&ctx, context, sizeof(ctx));
+  } else {
+    RtlCaptureContext(&ctx);
+  }
+
+  st->print_cr("Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)");
+
+  STACKFRAME stk;
+  memset(&stk, 0, sizeof(stk));
+  stk.AddrStack.Offset    = ctx.Rsp;
+  stk.AddrStack.Mode      = AddrModeFlat;
+  stk.AddrFrame.Offset    = ctx.Rbp;
+  stk.AddrFrame.Mode      = AddrModeFlat;
+  stk.AddrPC.Offset       = ctx.Rip;
+  stk.AddrPC.Mode         = AddrModeFlat;
+
+  int count = 0;
+  address lastpc = 0;
+  while (count++ < StackPrintLimit) {
+    intptr_t* sp = (intptr_t*)stk.AddrStack.Offset;
+    intptr_t* fp = (intptr_t*)stk.AddrFrame.Offset; // NOT necessarily the same as ctx.Rbp!
+    address pc = (address)stk.AddrPC.Offset;
+
+    if (pc != NULL && sp != NULL && fp != NULL) {
+      if (count == 2 && lastpc == pc) {
+        // Skip it -- StackWalk64() may return the same PC
+        // (but different SP) on the first try.
+      } else {
+        // Don't try to create a frame(sp, fp, pc) -- on WinX64, stk.AddrFrame
+        // may not contain what Java expects, and may cause the frame() constructor
+        // to crash. Let's just print out the symbolic address.
+        frame::print_C_frame(st, buf, buf_size, pc);
+        st->cr();
+      }
+      lastpc = pc;
+    } else {
+      break;
+    }
+
+    PVOID p = WindowsDbgHelp::SymFunctionTableAccess64(GetCurrentProcess(), stk.AddrPC.Offset);
+    if (!p) {
+      // StackWalk64() can't handle this PC. Calling StackWalk64 again may cause crash.
+      break;
+    }
+
+    BOOL result = WindowsDbgHelp::StackWalk64(
+        IMAGE_FILE_MACHINE_AMD64,  // __in      DWORD MachineType,
+        GetCurrentProcess(),       // __in      HANDLE hProcess,
+        GetCurrentThread(),        // __in      HANDLE hThread,
+        &stk,                      // __inout   LP STACKFRAME64 StackFrame,
+        &ctx,                      // __inout   PVOID ContextRecord,
+        NULL,                      // __in_opt  PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
+        WindowsDbgHelp::pfnSymFunctionTableAccess64(),
+                                   // __in_opt  PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
+        WindowsDbgHelp::pfnSymGetModuleBase64(),
+                                   // __in_opt  PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
+        NULL);                     // __in_opt  PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress
+
+    if (!result) {
+      break;
+    }
+  }
+  if (count > StackPrintLimit) {
+    st->print_cr("...<more frames>...");
+  }
+  st->cr();
+
+  return true;
+}
+#endif // AMD64
+
 ExtendedPC os::fetch_frame_from_context(void* ucVoid,
                     intptr_t** ret_sp, intptr_t** ret_fp) {
 
@@ -401,6 +490,9 @@
                                      StubRoutines::x86::get_previous_fp_entry());
   if (func == NULL) return frame();
   intptr_t* fp = (*func)();
+  if (fp == NULL) {
+    return frame();
+  }
 #else
   intptr_t* fp = _get_previous_fp();
 #endif // AMD64
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Fri Sep 06 08:42:42 2013 -0700
@@ -62,4 +62,10 @@
 
   static bool      register_code_area(char *low, char *high);
 
+#ifdef AMD64
+#define PLATFORM_PRINT_NATIVE_STACK 1
+static bool platform_print_native_stack(outputStream* st, void* context,
+                                        char *buf, int buf_size);
+#endif
+
 #endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_HPP
--- a/hotspot/src/share/vm/runtime/frame.cpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/runtime/frame.cpp	Fri Sep 06 08:42:42 2013 -0700
@@ -652,7 +652,7 @@
 // Return whether the frame is in the VM or os indicating a Hotspot problem.
 // Otherwise, it's likely a bug in the native library that the Java code calls,
 // hopefully indicating where to submit bugs.
-static void print_C_frame(outputStream* st, char* buf, int buflen, address pc) {
+void frame::print_C_frame(outputStream* st, char* buf, int buflen, address pc) {
   // C/C++ frame
   bool in_vm = os::address_is_in_vm(pc);
   st->print(in_vm ? "V" : "C");
--- a/hotspot/src/share/vm/runtime/frame.hpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/runtime/frame.hpp	Fri Sep 06 08:42:42 2013 -0700
@@ -406,6 +406,7 @@
   void print_on(outputStream* st) const;
   void interpreter_frame_print_on(outputStream* st) const;
   void print_on_error(outputStream* st, char* buf, int buflen, bool verbose = false) const;
+  static void print_C_frame(outputStream* st, char* buf, int buflen, address pc);
 
   // Add annotated descriptions of memory locations belonging to this frame to values
   void describe(FrameValues& values, int frame_no);
--- a/hotspot/src/share/vm/runtime/os.hpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/runtime/os.hpp	Fri Sep 06 08:42:42 2013 -0700
@@ -795,6 +795,14 @@
 #endif
 
  public:
+#ifndef PLATFORM_PRINT_NATIVE_STACK
+  // No platform-specific code for printing the native stack.
+  static bool platform_print_native_stack(outputStream* st, void* context,
+                                          char *buf, int buf_size) {
+    return false;
+  }
+#endif
+
   // debugging support (mostly used by debug.cpp but also fatal error handler)
   static bool find(address pc, outputStream* st = tty); // OS specific function to make sense out of an address
 
--- a/hotspot/src/share/vm/utilities/decoder.cpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/utilities/decoder.cpp	Fri Sep 06 08:42:42 2013 -0700
@@ -24,7 +24,6 @@
 
 #include "precompiled.hpp"
 #include "prims/jvm.h"
-#include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/vmError.hpp"
@@ -80,6 +79,23 @@
   return decoder;
 }
 
+inline bool DecoderLocker::is_first_error_thread() {
+  return (os::current_thread_id() == VMError::get_first_error_tid());
+}
+
+DecoderLocker::DecoderLocker() :
+  MutexLockerEx(DecoderLocker::is_first_error_thread() ?
+                NULL : Decoder::shared_decoder_lock(), true) {
+  _decoder = is_first_error_thread() ?
+    Decoder::get_error_handler_instance() : Decoder::get_shared_instance();
+  assert(_decoder != NULL, "null decoder");
+}
+
+Mutex* Decoder::shared_decoder_lock() {
+  assert(_shared_decoder_lock != NULL, "Just check");
+  return _shared_decoder_lock;
+}
+
 bool Decoder::decode(address addr, char* buf, int buflen, int* offset, const char* modulepath) {
   assert(_shared_decoder_lock != NULL, "Just check");
   bool error_handling_thread = os::current_thread_id() == VMError::first_error_tid;
--- a/hotspot/src/share/vm/utilities/decoder.hpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/utilities/decoder.hpp	Fri Sep 06 08:42:42 2013 -0700
@@ -28,6 +28,7 @@
 
 #include "memory/allocation.hpp"
 #include "runtime/mutex.hpp"
+#include "runtime/mutexLocker.hpp"
 
 class AbstractDecoder : public CHeapObj<mtInternal> {
 public:
@@ -124,6 +125,19 @@
 
 protected:
   static Mutex*               _shared_decoder_lock;
+  static Mutex* shared_decoder_lock();
+
+  friend class DecoderLocker;
+};
+
+class DecoderLocker : public MutexLockerEx {
+  AbstractDecoder* _decoder;
+  inline bool is_first_error_thread();
+public:
+  DecoderLocker();
+  AbstractDecoder* decoder() {
+    return _decoder;
+  }
 };
 
 #endif // SHARE_VM_UTILITIES_DECODER_HPP
--- a/hotspot/src/share/vm/utilities/vmError.cpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/utilities/vmError.cpp	Fri Sep 06 08:42:42 2013 -0700
@@ -574,6 +574,10 @@
   STEP(120, "(printing native stack)" )
 
      if (_verbose) {
+     if (os::platform_print_native_stack(st, _context, buf, sizeof(buf))) {
+       // We have printed the native stack in platform-specific code
+       // Windows/x64 needs special handling.
+     } else {
        frame fr = _context ? os::fetch_frame_from_context(_context)
                            : os::current_frame();
 
@@ -604,6 +608,7 @@
           st->cr();
        }
      }
+   }
 
   STEP(130, "(printing Java stack)" )
 
--- a/hotspot/src/share/vm/utilities/vmError.hpp	Wed Sep 04 08:55:08 2013 -0400
+++ b/hotspot/src/share/vm/utilities/vmError.hpp	Fri Sep 06 08:42:42 2013 -0700
@@ -136,6 +136,10 @@
 
   // check to see if fatal error reporting is in progress
   static bool fatal_error_in_progress() { return first_error != NULL; }
+
+  static jlong get_first_error_tid() {
+    return first_error_tid;
+  }
 };
 
 #endif // SHARE_VM_UTILITIES_VMERROR_HPP