8181143: Introduce diagnostic flag to abort VM on too long VM operations
Reviewed-by: rkennke, zgu, dholmes, stuefe, rehn
--- a/src/hotspot/share/runtime/globals.hpp Thu Dec 13 10:35:09 2018 -0500
+++ b/src/hotspot/share/runtime/globals.hpp Thu Dec 13 16:45:24 2018 +0100
@@ -501,6 +501,13 @@
diagnostic(bool, AbortVMOnSafepointTimeout, false, \
"Abort upon failure to reach safepoint (see SafepointTimeout)") \
\
+ diagnostic(bool, AbortVMOnVMOperationTimeout, false, \
+ "Abort upon failure to complete VM operation promptly") \
+ \
+ diagnostic(intx, AbortVMOnVMOperationTimeoutDelay, 1000, \
+ "Delay in milliseconds for option AbortVMOnVMOperationTimeout") \
+ range(0, max_intx) \
+ \
/* 50 retries * (5 * current_retry_count) millis = ~6.375 seconds */ \
/* typically, at most a few retries are needed */ \
product(intx, SuspendRetryCount, 50, \
--- a/src/hotspot/share/runtime/vmThread.cpp Thu Dec 13 10:35:09 2018 -0500
+++ b/src/hotspot/share/runtime/vmThread.cpp Thu Dec 13 16:45:24 2018 +0100
@@ -28,6 +28,7 @@
#include "jfr/jfrEvents.hpp"
#include "jfr/support/jfrThreadId.hpp"
#include "logging/log.hpp"
+#include "logging/logStream.hpp"
#include "logging/logConfiguration.hpp"
#include "memory/resourceArea.hpp"
#include "oops/method.hpp"
@@ -197,6 +198,32 @@
drain_list_oops_do(f);
}
+//------------------------------------------------------------------------------------------------------------------
+// Timeout machinery
+
+void VMOperationTimeoutTask::task() {
+ assert(AbortVMOnVMOperationTimeout, "only if enabled");
+ if (is_armed()) {
+ jlong delay = (os::javaTimeMillis() - _arm_time);
+ if (delay > AbortVMOnVMOperationTimeoutDelay) {
+ fatal("VM operation took too long: " SIZE_FORMAT " ms (timeout: " SIZE_FORMAT " ms)",
+ delay, AbortVMOnVMOperationTimeoutDelay);
+ }
+ }
+}
+
+bool VMOperationTimeoutTask::is_armed() {
+ return OrderAccess::load_acquire(&_armed) != 0;
+}
+
+void VMOperationTimeoutTask::arm() {
+ _arm_time = os::javaTimeMillis();
+ OrderAccess::release_store_fence(&_armed, 1);
+}
+
+void VMOperationTimeoutTask::disarm() {
+ OrderAccess::release_store_fence(&_armed, 0);
+}
//------------------------------------------------------------------------------------------------------------------
// Implementation of VMThread stuff
@@ -209,12 +236,28 @@
VMOperationQueue* VMThread::_vm_queue = NULL;
PerfCounter* VMThread::_perf_accumulated_vm_operation_time = NULL;
const char* VMThread::_no_op_reason = NULL;
+VMOperationTimeoutTask* VMThread::_timeout_task = NULL;
void VMThread::create() {
assert(vm_thread() == NULL, "we can only allocate one VMThread");
_vm_thread = new VMThread();
+ if (AbortVMOnVMOperationTimeout) {
+ // Make sure we call the timeout task frequently enough, but not too frequent.
+ // Try to make the interval 10% of the timeout delay, so that we miss the timeout
+ // by those 10% at max. Periodic task also expects it to fit min/max intervals.
+ size_t interval = (size_t)AbortVMOnVMOperationTimeoutDelay / 10;
+ interval = interval / PeriodicTask::interval_gran * PeriodicTask::interval_gran;
+ interval = MAX2<size_t>(interval, PeriodicTask::min_interval);
+ interval = MIN2<size_t>(interval, PeriodicTask::max_interval);
+
+ _timeout_task = new VMOperationTimeoutTask(interval);
+ _timeout_task->enroll();
+ } else {
+ assert(_timeout_task == NULL, "sanity");
+ }
+
// Create VM operation queue
_vm_queue = new VMOperationQueue();
guarantee(_vm_queue != NULL, "just checking");
@@ -492,6 +535,11 @@
_vm_queue->set_drain_list(safepoint_ops); // ensure ops can be scanned
SafepointSynchronize::begin();
+
+ if (_timeout_task != NULL) {
+ _timeout_task->arm();
+ }
+
evaluate_operation(_cur_vm_operation);
// now process all queued safepoint ops, iteratively draining
// the queue until there are none left
@@ -533,6 +581,10 @@
_vm_queue->set_drain_list(NULL);
+ if (_timeout_task != NULL) {
+ _timeout_task->disarm();
+ }
+
// Complete safepoint synchronization
SafepointSynchronize::end();
--- a/src/hotspot/share/runtime/vmThread.hpp Thu Dec 13 10:35:09 2018 -0500
+++ b/src/hotspot/share/runtime/vmThread.hpp Thu Dec 13 16:45:24 2018 +0100
@@ -27,6 +27,7 @@
#include "runtime/perfData.hpp"
#include "runtime/thread.hpp"
+#include "runtime/task.hpp"
#include "runtime/vmOperations.hpp"
//
@@ -84,6 +85,26 @@
};
+// VM operation timeout handling: warn or abort the VM when VM operation takes
+// too long. Periodic tasks do not participate in safepoint protocol, and therefore
+// can fire when application threads are stopped.
+
+class VMOperationTimeoutTask : public PeriodicTask {
+private:
+ volatile int _armed;
+ jlong _arm_time;
+
+public:
+ VMOperationTimeoutTask(size_t interval_time) :
+ PeriodicTask(interval_time), _armed(0), _arm_time(0) {}
+
+ virtual void task();
+
+ bool is_armed();
+ void arm();
+ void disarm();
+};
+
//
// A single VMThread (the primordial thread) spawns all other threads
// and is itself used by other threads to offload heavy vm operations
@@ -101,6 +122,8 @@
static const char* _no_op_reason;
+ static VMOperationTimeoutTask* _timeout_task;
+
static bool no_op_safepoint_needed(bool check_time);
void evaluate_operation(VM_Operation* op);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/Safepoint/TestAbortOnVMOperationTimeout.java Thu Dec 13 16:45:24 2018 +0100
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.*;
+import jdk.test.lib.process.*;
+
+/*
+ * @test TestAbortOnVMOperationTimeout
+ * @bug 8181143
+ * @summary Check abort on VM timeout is working
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * java.management
+ */
+
+public class TestAbortOnVMOperationTimeout {
+
+ public static void main(String[] args) throws Exception {
+ if (args.length > 0) {
+ Object[] arr = new Object[10_000_000];
+ for (int i = 0; i < arr.length; i++) {
+ arr[i] = new Object();
+ }
+ return;
+ }
+
+ // These should definitely pass: more than a minute is enough for Serial to act.
+ // The values are deliberately non-round to trip off periodic task granularity.
+ for (int delay : new int[]{63423, 12388131}) {
+ testWith(delay, true);
+ }
+
+ // These should fail: Serial is not very fast. Traversing 10M objects in 5 ms
+ // means less than 0.5 ns per object, which is not doable.
+ for (int delay : new int[]{0, 1, 5}) {
+ testWith(delay, false);
+ }
+ }
+
+ public static void testWith(int delay, boolean shouldPass) throws Exception {
+ ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+ "-XX:+UnlockDiagnosticVMOptions",
+ "-XX:+AbortVMOnVMOperationTimeout",
+ "-XX:AbortVMOnVMOperationTimeoutDelay=" + delay,
+ "-Xmx256m",
+ "-XX:+UseSerialGC",
+ "-XX:-CreateCoredumpOnCrash",
+ "TestAbortOnVMOperationTimeout",
+ "foo"
+ );
+
+ OutputAnalyzer output = new OutputAnalyzer(pb.start());
+ if (shouldPass) {
+ output.shouldHaveExitValue(0);
+ } else {
+ output.shouldMatch("VM operation took too long");
+ output.shouldNotHaveExitValue(0);
+ }
+ }
+}
+