7014918: Improve core/minidump handling in Hotspot
authorctornqvi
Wed, 09 Feb 2011 11:08:10 +0100
changeset 8119 81eef1b06988
parent 8118 7c1661c44c4a
child 8120 647514d55e9d
7014918: Improve core/minidump handling in Hotspot Summary: Added Minidump support on Windows, enabled large page core dumps when coredump_filter is present and writing out path/rlimit for core dumps. Reviewed-by: poonam, dsamersoff, sla, coleenp
hotspot/src/os/linux/vm/os_linux.cpp
hotspot/src/os/posix/vm/os_posix.cpp
hotspot/src/os/windows/vm/os_windows.cpp
hotspot/src/os/windows/vm/os_windows.hpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/os.hpp
hotspot/src/share/vm/utilities/vmError.cpp
hotspot/src/share/vm/utilities/vmError.hpp
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -131,6 +131,7 @@
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
 #define SEC_IN_NANOSECS  1000000000LL
 
+#define LARGEPAGES_BIT (1 << 6)
 ////////////////////////////////////////////////////////////////////////////////
 // global variables
 julong os::Linux::_physical_memory = 0;
@@ -2817,6 +2818,43 @@
   return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
 }
 
+/*
+* Set the coredump_filter bits to include largepages in core dump (bit 6)
+*
+* From the coredump_filter documentation:
+*
+* - (bit 0) anonymous private memory
+* - (bit 1) anonymous shared memory
+* - (bit 2) file-backed private memory
+* - (bit 3) file-backed shared memory
+* - (bit 4) ELF header pages in file-backed private memory areas (it is
+*           effective only if the bit 2 is cleared)
+* - (bit 5) hugetlb private memory
+* - (bit 6) hugetlb shared memory
+*/
+static void set_coredump_filter(void) {
+  FILE *f;
+  long cdm;
+
+  if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) {
+    return;
+  }
+
+  if (fscanf(f, "%lx", &cdm) != 1) {
+    fclose(f);
+    return;
+  }
+
+  rewind(f);
+
+  if ((cdm & LARGEPAGES_BIT) == 0) {
+    cdm |= LARGEPAGES_BIT;
+    fprintf(f, "%#lx", cdm);
+  }
+
+  fclose(f);
+}
+
 // Large page support
 
 static size_t _large_page_size = 0;
@@ -2874,6 +2912,8 @@
     _page_sizes[2] = 0;
   }
 
+  set_coredump_filter();
+
   // Large page support is available on 2.6 or newer kernel, some vendors
   // (e.g. Redhat) have backported it to their 2.4 based distributions.
   // We optimistically assume the support is available. If later it turns out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os/posix/vm/os_posix.cpp	Wed Feb 09 11:08:10 2011 +0100
@@ -0,0 +1,61 @@
+/*
+* Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#include "prims/jvm.h"
+#include "runtime/os.hpp"
+#include "utilities/vmError.hpp"
+
+#include <unistd.h>
+#include <sys/resource.h>
+
+// Check core dump limit and report possible place where core can be found
+void os::check_or_create_dump(void* exceptionRecord, void* contextRecord, char* buffer, size_t bufferSize) {
+  struct rlimit rlim;
+  static char cwd[O_BUFLEN];
+  bool success;
+
+  get_current_directory(cwd, sizeof(cwd));
+
+  if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
+    jio_snprintf(buffer, bufferSize, "%s/core or core.%d (may not exist)", cwd, current_process_id());
+    success = true;
+  } else {
+    switch(rlim.rlim_cur) {
+      case RLIM_INFINITY:
+        jio_snprintf(buffer, bufferSize, "%s/core or core.%d", cwd, current_process_id());
+        success = true;
+        break;
+      case 0:
+        jio_snprintf(buffer, bufferSize, "Core dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again");
+        success = false;
+        break;
+      default:
+        jio_snprintf(buffer, bufferSize, "%s/core or core.%d (max size %lu kB). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", cwd, current_process_id(), (unsigned long)(rlim.rlim_cur >> 10));
+        success = true;
+        break;
+    }
+  }
+  VMError::report_coredump_status(buffer, success);
+}
+
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * CopyrighT (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -915,6 +915,80 @@
   }
 }
 
+
+static BOOL  (WINAPI *_MiniDumpWriteDump)  ( HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION,
+                                            PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION);
+
+void os::check_or_create_dump(void* exceptionRecord, void* contextRecord, char* buffer, size_t bufferSize) {
+  HINSTANCE dbghelp;
+  EXCEPTION_POINTERS ep;
+  MINIDUMP_EXCEPTION_INFORMATION mei;
+  HANDLE hProcess = GetCurrentProcess();
+  DWORD processId = GetCurrentProcessId();
+  HANDLE dumpFile;
+  MINIDUMP_TYPE dumpType;
+  static const char* cwd;
+
+  // If running on a client version of Windows and user has not explicitly enabled dumping
+  if (!os::win32::is_windows_server() && !CreateMinidumpOnCrash) {
+    VMError::report_coredump_status("Minidumps are not enabled by default on client versions of Windows", false);
+    return;
+    // If running on a server version of Windows and user has explictly disabled dumping
+  } else if (os::win32::is_windows_server() && !FLAG_IS_DEFAULT(CreateMinidumpOnCrash) && !CreateMinidumpOnCrash) {
+    VMError::report_coredump_status("Minidump has been disabled from the command line", false);
+    return;
+  }
+
+  dbghelp = LoadLibrary("DBGHELP.DLL");
+
+  if (dbghelp == NULL) {
+    VMError::report_coredump_status("Failed to load dbghelp.dll", false);
+    return;
+  }
+
+  _MiniDumpWriteDump = CAST_TO_FN_PTR(
+    BOOL(WINAPI *)( HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION,
+    PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION),
+    GetProcAddress(dbghelp, "MiniDumpWriteDump"));
+
+  if (_MiniDumpWriteDump == NULL) {
+    VMError::report_coredump_status("Failed to find MiniDumpWriteDump() in module dbghelp.dll", false);
+    return;
+  }
+
+  dumpType = (MINIDUMP_TYPE)(MiniDumpWithFullMemory | MiniDumpWithFullMemoryInfo |
+    MiniDumpWithHandleData | MiniDumpWithThreadInfo | MiniDumpWithUnloadedModules);
+
+
+  cwd = get_current_directory(NULL, 0);
+  jio_snprintf(buffer, bufferSize, "%s\\hs_err_pid%u.mdmp",cwd, current_process_id());
+  dumpFile = CreateFile(buffer, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+  if (dumpFile == INVALID_HANDLE_VALUE) {
+    VMError::report_coredump_status("Failed to create file for dumping", false);
+    return;
+  }
+
+  ep.ContextRecord = (PCONTEXT) contextRecord;
+  ep.ExceptionRecord = (PEXCEPTION_RECORD) exceptionRecord;
+
+  mei.ThreadId = GetCurrentThreadId();
+  mei.ExceptionPointers = &ep;
+
+  // Older versions of dbghelp.dll (the one shipped with Win2003 for example) may not support all
+  // the dump types we really want. If first call fails, lets fall back to just use MiniDumpWithFullMemory then.
+  if (_MiniDumpWriteDump(hProcess, processId, dumpFile, dumpType, &mei, NULL, NULL) == false &&
+      _MiniDumpWriteDump(hProcess, processId, dumpFile, (MINIDUMP_TYPE)MiniDumpWithFullMemory, &mei, NULL, NULL) == false) {
+    VMError::report_coredump_status("Call to MiniDumpWriteDump() failed", false);
+  } else {
+    VMError::report_coredump_status(buffer, true);
+  }
+
+  CloseHandle(dumpFile);
+}
+
+
+
 void os::abort(bool dump_core)
 {
   os::shutdown();
@@ -3274,7 +3348,7 @@
 
 bool   os::win32::_is_nt              = false;
 bool   os::win32::_is_windows_2003    = false;
-
+bool   os::win32::_is_windows_server  = false;
 
 void os::win32::initialize_system_info() {
   SYSTEM_INFO si;
@@ -3293,9 +3367,9 @@
   GlobalMemoryStatusEx(&ms);
   _physical_memory = ms.ullTotalPhys;
 
-  OSVERSIONINFO oi;
-  oi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-  GetVersionEx(&oi);
+  OSVERSIONINFOEX oi;
+  oi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
+  GetVersionEx((OSVERSIONINFO*)&oi);
   switch(oi.dwPlatformId) {
     case VER_PLATFORM_WIN32_WINDOWS: _is_nt = false; break;
     case VER_PLATFORM_WIN32_NT:
@@ -3305,6 +3379,10 @@
         if (os_vers == 5002) {
           _is_windows_2003 = true;
         }
+        if (oi.wProductType == VER_NT_DOMAIN_CONTROLLER ||
+          oi.wProductType == VER_NT_SERVER) {
+            _is_windows_server = true;
+        }
       }
       break;
     default: fatal("Unknown platform");
--- a/hotspot/src/os/windows/vm/os_windows.hpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/os/windows/vm/os_windows.hpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,7 @@
   static size_t _default_stack_size;
   static bool   _is_nt;
   static bool   _is_windows_2003;
+  static bool   _is_windows_server;
 
  public:
   // Windows-specific interface:
@@ -64,6 +65,9 @@
   // Tells whether the platform is NT or Windown95
   static bool is_nt() { return _is_nt; }
 
+  // Tells whether this is a server version of Windows
+  static bool is_windows_server() { return _is_windows_server; }
+
   // Tells whether the platform is Windows 2003
   static bool is_windows_2003() { return _is_windows_2003; }
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -790,6 +790,9 @@
   product(bool, ShowMessageBoxOnError, false,                               \
           "Keep process alive on VM fatal error")                           \
                                                                             \
+  product(bool, CreateMinidumpOnCrash, false,                               \
+          "Create minidump on VM fatal error")                              \
+                                                                            \
   product_pd(bool, UseOSErrorReporting,                                     \
           "Let VM fatal error propagate to the OS (ie. WER on Windows)")    \
                                                                             \
--- a/hotspot/src/share/vm/runtime/os.hpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/share/vm/runtime/os.hpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -614,6 +614,9 @@
   // Structured OS Exception support
   static void os_exception_wrapper(java_call_t f, JavaValue* value, methodHandle* method, JavaCallArguments* args, Thread* thread);
 
+  // On Windows this will create an actual minidump, on Linux/Solaris it will simply check core dump limits
+  static void check_or_create_dump(void* exceptionRecord, void* contextRecord, char* buffer, size_t bufferSize);
+
   // JVMTI & JVM monitoring and management support
   // The thread_cpu_time() and current_thread_cpu_time() are only
   // supported if is_thread_cpu_time_supported() returns true.
--- a/hotspot/src/share/vm/utilities/vmError.cpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/share/vm/utilities/vmError.cpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -201,6 +201,15 @@
   out->print_raw_cr("#");
 }
 
+bool VMError::coredump_status;
+char VMError::coredump_message[O_BUFLEN];
+
+void VMError::report_coredump_status(const char* message, bool status) {
+  coredump_status = status;
+  strncpy(coredump_message, message, sizeof(coredump_message));
+  coredump_message[sizeof(coredump_message)-1] = 0;
+}
+
 
 // Return a string to describe the error
 char* VMError::error_string(char* buf, int buflen) {
@@ -454,6 +463,15 @@
        st->cr();
        st->print_cr("#");
      }
+  STEP(63, "(printing core file information)")
+    st->print("# ");
+    if (coredump_status) {
+      st->print("Core dump written. Default location: %s", coredump_message);
+    } else {
+      st->print("Failed to write core dump. %s", coredump_message);
+    }
+    st->print_cr("");
+    st->print_cr("#");
 
   STEP(65, "(printing bug submit message)")
 
@@ -792,6 +810,9 @@
       ShowMessageBoxOnError = false;
     }
 
+    // Write a minidump on Windows, check core dump limits on Linux/Solaris
+    os::check_or_create_dump(_siginfo, _context, buffer, sizeof(buffer));
+
     // reset signal handlers or exception filter; make sure recursive crashes
     // are handled properly.
     reset_signal_handlers();
--- a/hotspot/src/share/vm/utilities/vmError.hpp	Tue Feb 08 22:27:57 2011 -0800
+++ b/hotspot/src/share/vm/utilities/vmError.hpp	Wed Feb 09 11:08:10 2011 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,14 @@
   static VMError* volatile first_error;
   static volatile jlong    first_error_tid;
 
+  // Core dump status, false if we have been unable to write a core/minidump for some reason
+  static bool coredump_status;
+
+  // When coredump_status is set to true this will contain the name/path to the core/minidump,
+  // if coredump_status if false, this will (hopefully) contain a useful error explaining why
+  // no core/minidump has been written to disk
+  static char coredump_message[O_BUFLEN];
+
   // used by reporting about OOM
   size_t       _size;
 
@@ -106,6 +114,9 @@
   // return a string to describe the error
   char *error_string(char* buf, int buflen);
 
+  // Report status of core/minidump
+  static void report_coredump_status(const char* message, bool status);
+
   // main error reporting function
   void report_and_die();