8188872: runtime/ErrorHandling/TimeoutInErrorHandlingTest.java fails intermittently
authordcubed
Wed, 05 Jun 2019 14:07:14 -0400
changeset 55239 74832e7b5cad
parent 55238 3bd979f09721
child 55240 57b93b113ec0
8188872: runtime/ErrorHandling/TimeoutInErrorHandlingTest.java fails intermittently Reviewed-by: dholmes, stuefe
src/hotspot/share/utilities/vmError.cpp
src/hotspot/share/utilities/vmError.hpp
test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java
--- a/src/hotspot/share/utilities/vmError.cpp	Wed Jun 05 14:01:01 2019 -0400
+++ b/src/hotspot/share/utilities/vmError.cpp	Wed Jun 05 14:07:14 2019 -0400
@@ -406,12 +406,16 @@
   return Atomic::load(&_step_start_time);
 }
 
+void VMError::clear_step_start_time() {
+  return Atomic::store((jlong)0, &_step_start_time);
+}
+
 void VMError::report(outputStream* st, bool _verbose) {
 
 # define BEGIN if (_current_step == 0) { _current_step = __LINE__;
 # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
   record_step_start_time(); _step_did_timeout = false;
-# define END }
+# define END clear_step_start_time(); }
 
   // don't allocate large buffer on stack
   static char buf[O_BUFLEN];
@@ -451,6 +455,15 @@
   // Step to global timeout ratio is 4:1, so in order to be absolutely sure we hit the
   // global timeout, let's execute the timeout step five times.
   // See corresponding test in test/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java
+  STEP("setup for test unresponsive error reporting step")
+    if (_verbose && TestUnresponsiveErrorHandler) {
+      // We record reporting_start_time for this test here because we
+      // care about the time spent executing TIMEOUT_TEST_STEP and not
+      // about the time it took us to get here.
+      tty->print_cr("Recording reporting_start_time for TestUnresponsiveErrorHandler.");
+      record_reporting_start_time();
+    }
+
   #define TIMEOUT_TEST_STEP STEP("test unresponsive error reporting step") \
     if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
   TIMEOUT_TEST_STEP
@@ -1358,7 +1371,14 @@
     _error_reported = true;
 
     reporting_started();
-    record_reporting_start_time();
+    if (!TestUnresponsiveErrorHandler) {
+      // Record reporting_start_time unless we're running the
+      // TestUnresponsiveErrorHandler test. For that test we record
+      // reporting_start_time at the beginning of the test.
+      record_reporting_start_time();
+    } else {
+      out.print_raw_cr("Delaying recording reporting_start_time for TestUnresponsiveErrorHandler.");
+    }
 
     if (ShowMessageBoxOnError || PauseAtExit) {
       show_message_box(buffer, sizeof(buffer));
@@ -1665,7 +1685,9 @@
   // Timestamp is stored in nanos.
   if (reporting_start_time_l > 0) {
     const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
-    if (end <= now) {
+    if (end <= now && !_reporting_did_timeout) {
+      // We hit ErrorLogTimeout and we haven't interrupted the reporting
+      // thread yet.
       _reporting_did_timeout = true;
       interrupt_reporting_thread();
       return true; // global timeout
@@ -1678,7 +1700,9 @@
     // hang for some reason, so this simple rule allows for three hanging step and still
     // hopefully leaves time enough for the rest of the steps to finish.
     const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4;
-    if (end <= now) {
+    if (end <= now && !_step_did_timeout) {
+      // The step timed out and we haven't interrupted the reporting
+      // thread yet.
       _step_did_timeout = true;
       interrupt_reporting_thread();
       return false; // (Not a global timeout)
--- a/src/hotspot/share/utilities/vmError.hpp	Wed Jun 05 14:01:01 2019 -0400
+++ b/src/hotspot/share/utilities/vmError.hpp	Wed Jun 05 14:07:14 2019 -0400
@@ -135,6 +135,7 @@
   static jlong get_reporting_start_time();
   static void record_step_start_time();
   static jlong get_step_start_time();
+  static void clear_step_start_time();
 
 public:
 
--- a/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java	Wed Jun 05 14:01:01 2019 -0400
+++ b/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java	Wed Jun 05 14:07:14 2019 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,9 +43,18 @@
 
 public class TimeoutInErrorHandlingTest {
 
+    public static final boolean verbose = System.getProperty("verbose") != null;
+    // 16 seconds for hs_err generation timeout = 4 seconds per step timeout
+    public static final int ERROR_LOG_TIMEOUT = 16;
 
     public static void main(String[] args) throws Exception {
 
+        int error_log_timeout = ERROR_LOG_TIMEOUT;
+        if ("SunOS".equals(System.getProperty("os.name"))) {
+            // Give Solaris machines 3X as much time:
+            error_log_timeout *= 3;
+        }
+
         /* Start the VM and let it crash. Specify TestUnresponsiveErrorHandler which will
          * let five subsequent error reporting steps hang. The Timeout handling triggered
          * by the WatcherThread should kick in and interrupt those steps. In theory, the
@@ -72,12 +81,18 @@
             "-Xmx100M",
             "-XX:ErrorHandlerTest=14",
             "-XX:+TestUnresponsiveErrorHandler",
-            "-XX:ErrorLogTimeout=16", // 16 seconds big timeout = 4 seconds per little timeout
+            "-XX:ErrorLogTimeout=" + error_log_timeout,
             "-XX:-CreateCoredumpOnCrash",
             "-version");
 
         OutputAnalyzer output_detail = new OutputAnalyzer(pb.start());
 
+        if (verbose) {
+            System.err.println("<begin cmd output>");
+            System.err.println(output_detail.getOutput());
+            System.err.println("<end cmd output>");
+        }
+
         // we should have crashed with a SIGSEGV
         output_detail.shouldMatch("# A fatal error has been detected by the Java Runtime Environment:.*");
         output_detail.shouldMatch("# +(?:SIGSEGV|EXCEPTION_ACCESS_VIOLATION).*");
@@ -88,11 +103,21 @@
         // extract hs-err file
         String hs_err_file = output_detail.firstMatch("# *(\\S*hs_err_pid\\d+\\.log)", 1);
         if (hs_err_file == null) {
+            if (!verbose) {
+                System.err.println("<begin cmd output>");
+                System.err.println(output_detail.getOutput());
+                System.err.println("<end cmd output>");
+            }
             throw new RuntimeException("Did not find hs-err file in output.\n");
         }
 
         File f = new File(hs_err_file);
         if (!f.exists()) {
+            if (!verbose) {
+                System.err.println("<begin cmd output>");
+                System.err.println(output_detail.getOutput());
+                System.err.println("<end cmd output>");
+            }
             throw new RuntimeException("hs-err file missing at "
                 + f.getAbsolutePath() + ".\n");
         }
@@ -104,7 +129,6 @@
         String line = null;
 
 
-
         Pattern [] pattern = new Pattern[] {
             Pattern.compile(".*timeout occurred during error reporting in step.*"),
             Pattern.compile(".*timeout occurred during error reporting in step.*")
@@ -112,18 +136,31 @@
         int currentPattern = 0;
 
         String lastLine = null;
+        StringBuilder saved_hs_err = new StringBuilder();
         while ((line = br.readLine()) != null) {
+            saved_hs_err.append(line + System.lineSeparator());
             if (currentPattern < pattern.length) {
-              if (pattern[currentPattern].matcher(line).matches()) {
-                System.out.println("Found: " + line + ".");
-                currentPattern ++;
-              }
+                if (pattern[currentPattern].matcher(line).matches()) {
+                    System.out.println("Found: " + line + ".");
+                    currentPattern ++;
+                }
             }
             lastLine = line;
         }
         br.close();
 
+        if (verbose) {
+            System.err.println("<begin hs_err contents>");
+            System.err.print(saved_hs_err);
+            System.err.println("<end hs_err contents>");
+        }
+
         if (currentPattern < pattern.length) {
+            if (!verbose) {
+                System.err.println("<begin hs_err contents>");
+                System.err.print(saved_hs_err);
+                System.err.println("<end hs_err contents>");
+            }
             throw new RuntimeException("hs-err file incomplete (first missing pattern: " +  currentPattern + ")");
         }
 
@@ -132,4 +169,3 @@
     }
 
 }
-