--- a/src/hotspot/share/utilities/vmError.cpp Thu Oct 17 20:27:44 2019 +0100
+++ b/src/hotspot/share/utilities/vmError.cpp Thu Oct 17 20:53:35 2019 +0100
@@ -406,12 +406,16 @@
return Atomic::load(&_step_start_time);
}
+void VMError::clear_step_start_time() {
+ return Atomic::store((jlong)0, &_step_start_time);
+}
+
void VMError::report(outputStream* st, bool _verbose) {
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
# define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
record_step_start_time(); _step_did_timeout = false;
-# define END }
+# define END clear_step_start_time(); }
// don't allocate large buffer on stack
static char buf[O_BUFLEN];
@@ -451,6 +455,15 @@
// Step to global timeout ratio is 4:1, so in order to be absolutely sure we hit the
// global timeout, let's execute the timeout step five times.
// See corresponding test in test/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java
+ STEP("setup for test unresponsive error reporting step")
+ if (_verbose && TestUnresponsiveErrorHandler) {
+ // We record reporting_start_time for this test here because we
+ // care about the time spent executing TIMEOUT_TEST_STEP and not
+ // about the time it took us to get here.
+ tty->print_cr("Recording reporting_start_time for TestUnresponsiveErrorHandler.");
+ record_reporting_start_time();
+ }
+
#define TIMEOUT_TEST_STEP STEP("test unresponsive error reporting step") \
if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
TIMEOUT_TEST_STEP
@@ -1192,7 +1205,7 @@
st->print_cr("END.");
}
-volatile intptr_t VMError::first_error_tid = -1;
+volatile intptr_t VMError::_first_error_tid = -1;
/** Expand a pattern into a buffer starting at pos and open a file using constructed path */
static int expand_and_open(const char* pattern, bool overwrite_existing, char* buf, size_t buflen, size_t pos) {
@@ -1314,6 +1327,12 @@
// File descriptor to the error log file.
static int fd_log = -1;
+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
+ // Disarm assertion poison page, since from this point on we do not need this mechanism anymore and it may
+ // cause problems in error handling during native OOM, see JDK-8227275.
+ disarm_assert_poison();
+#endif
+
// Use local fdStream objects only. Do not use global instances whose initialization
// relies on dynamic initialization (see JDK-8214975). Do not rely on these instances
// to carry over into recursions or invocations from other threads.
@@ -1336,8 +1355,8 @@
os::abort(CreateCoredumpOnCrash);
}
intptr_t mytid = os::current_thread_id();
- if (first_error_tid == -1 &&
- Atomic::cmpxchg(mytid, &first_error_tid, (intptr_t)-1) == -1) {
+ if (_first_error_tid == -1 &&
+ Atomic::cmpxchg(mytid, &_first_error_tid, (intptr_t)-1) == -1) {
// Initialize time stamps to use the same base.
out.time_stamp().update_to(1);
@@ -1358,7 +1377,14 @@
_error_reported = true;
reporting_started();
- record_reporting_start_time();
+ if (!TestUnresponsiveErrorHandler) {
+ // Record reporting_start_time unless we're running the
+ // TestUnresponsiveErrorHandler test. For that test we record
+ // reporting_start_time at the beginning of the test.
+ record_reporting_start_time();
+ } else {
+ out.print_raw_cr("Delaying recording reporting_start_time for TestUnresponsiveErrorHandler.");
+ }
if (ShowMessageBoxOnError || PauseAtExit) {
show_message_box(buffer, sizeof(buffer));
@@ -1390,7 +1416,7 @@
// This is not the first error, see if it happened in a different thread
// or in the same thread during error reporting.
- if (first_error_tid != mytid) {
+ if (_first_error_tid != mytid) {
char msgbuf[64];
jio_snprintf(msgbuf, sizeof(msgbuf),
"[thread " INTX_FORMAT " also had an error]",
@@ -1503,6 +1529,11 @@
log.set_fd(-1);
}
+ if (PrintNMTStatistics) {
+ fdStream fds(fd_out);
+ MemTracker::final_report(&fds);
+ }
+
static bool skip_replay = ReplayCompiles; // Do not overwrite file during replay
if (DumpReplayDataOnError && _thread && _thread->is_Compiler_thread() && !skip_replay) {
skip_replay = true;
@@ -1665,7 +1696,9 @@
// Timestamp is stored in nanos.
if (reporting_start_time_l > 0) {
const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
- if (end <= now) {
+ if (end <= now && !_reporting_did_timeout) {
+ // We hit ErrorLogTimeout and we haven't interrupted the reporting
+ // thread yet.
_reporting_did_timeout = true;
interrupt_reporting_thread();
return true; // global timeout
@@ -1678,7 +1711,9 @@
// hang for some reason, so this simple rule allows for three hanging step and still
// hopefully leaves time enough for the rest of the steps to finish.
const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4;
- if (end <= now) {
+ if (end <= now && !_step_did_timeout) {
+ // The step timed out and we haven't interrupted the reporting
+ // thread yet.
_step_did_timeout = true;
interrupt_reporting_thread();
return false; // (Not a global timeout)
@@ -1762,11 +1797,14 @@
// Case 16 is tested by test/hotspot/jtreg/runtime/ErrorHandling/ThreadsListHandleInErrorHandlingTest.java.
// Case 17 is tested by test/hotspot/jtreg/runtime/ErrorHandling/NestedThreadsListHandleInErrorHandlingTest.java.
- // We grab Threads_lock to keep ThreadsSMRSupport::print_info_on()
+ // We try to grab Threads_lock to keep ThreadsSMRSupport::print_info_on()
// from racing with Threads::add() or Threads::remove() as we
// generate the hs_err_pid file. This makes our ErrorHandling tests
// more stable.
- MutexLocker ml(Threads_lock->owned_by_self() ? NULL : Threads_lock, Mutex::_no_safepoint_check_flag);
+ if (!Threads_lock->owned_by_self()) {
+ Threads_lock->try_lock();
+ // The VM is going to die so no need to unlock Thread_lock.
+ }
switch (how) {
case 1: vmassert(str == NULL, "expected null"); break;