8145725: Remove the WorkAroundNPTLTimedWaitHang workaround
authordholmes
Wed, 10 Feb 2016 18:57:19 -0500
changeset 36087 c7770c4909d0
parent 36085 222ab7d1a9bf
child 36088 d203645e9ccb
8145725: Remove the WorkAroundNPTLTimedWaitHang workaround Reviewed-by: ddmitriev, stuefe, dcubed
hotspot/src/os/bsd/vm/os_bsd.cpp
hotspot/src/os/linux/vm/os_linux.cpp
hotspot/src/share/vm/runtime/globals.hpp
--- a/hotspot/src/os/bsd/vm/os_bsd.cpp	Wed Feb 10 14:30:25 2016 +0100
+++ b/hotspot/src/os/bsd/vm/os_bsd.cpp	Wed Feb 10 18:57:19 2016 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -4042,61 +4042,6 @@
 //        could have been signaled after a wait started
 //    1 : signaled - thread is running or ready
 //
-// Beware -- Some versions of NPTL embody a flaw where pthread_cond_timedwait() can
-// hang indefinitely.  For instance NPTL 0.60 on 2.4.21-4ELsmp is vulnerable.
-// For specifics regarding the bug see GLIBC BUGID 261237 :
-//    http://www.mail-archive.com/debian-glibc@lists.debian.org/msg10837.html.
-// Briefly, pthread_cond_timedwait() calls with an expiry time that's not in the future
-// will either hang or corrupt the condvar, resulting in subsequent hangs if the condvar
-// is used.  (The simple C test-case provided in the GLIBC bug report manifests the
-// hang).  The JVM is vulernable via sleep(), Object.wait(timo), LockSupport.parkNanos()
-// and monitorenter when we're using 1-0 locking.  All those operations may result in
-// calls to pthread_cond_timedwait().  Using LD_ASSUME_KERNEL to use an older version
-// of libpthread avoids the problem, but isn't practical.
-//
-// Possible remedies:
-//
-// 1.   Establish a minimum relative wait time.  50 to 100 msecs seems to work.
-//      This is palliative and probabilistic, however.  If the thread is preempted
-//      between the call to compute_abstime() and pthread_cond_timedwait(), more
-//      than the minimum period may have passed, and the abstime may be stale (in the
-//      past) resultin in a hang.   Using this technique reduces the odds of a hang
-//      but the JVM is still vulnerable, particularly on heavily loaded systems.
-//
-// 2.   Modify park-unpark to use per-thread (per ParkEvent) pipe-pairs instead
-//      of the usual flag-condvar-mutex idiom.  The write side of the pipe is set
-//      NDELAY. unpark() reduces to write(), park() reduces to read() and park(timo)
-//      reduces to poll()+read().  This works well, but consumes 2 FDs per extant
-//      thread.
-//
-// 3.   Embargo pthread_cond_timedwait() and implement a native "chron" thread
-//      that manages timeouts.  We'd emulate pthread_cond_timedwait() by enqueuing
-//      a timeout request to the chron thread and then blocking via pthread_cond_wait().
-//      This also works well.  In fact it avoids kernel-level scalability impediments
-//      on certain platforms that don't handle lots of active pthread_cond_timedwait()
-//      timers in a graceful fashion.
-//
-// 4.   When the abstime value is in the past it appears that control returns
-//      correctly from pthread_cond_timedwait(), but the condvar is left corrupt.
-//      Subsequent timedwait/wait calls may hang indefinitely.  Given that, we
-//      can avoid the problem by reinitializing the condvar -- by cond_destroy()
-//      followed by cond_init() -- after all calls to pthread_cond_timedwait().
-//      It may be possible to avoid reinitialization by checking the return
-//      value from pthread_cond_timedwait().  In addition to reinitializing the
-//      condvar we must establish the invariant that cond_signal() is only called
-//      within critical sections protected by the adjunct mutex.  This prevents
-//      cond_signal() from "seeing" a condvar that's in the midst of being
-//      reinitialized or that is corrupt.  Sadly, this invariant obviates the
-//      desirable signal-after-unlock optimization that avoids futile context switching.
-//
-//      I'm also concerned that some versions of NTPL might allocate an auxilliary
-//      structure when a condvar is used or initialized.  cond_destroy()  would
-//      release the helper structure.  Our reinitialize-after-timedwait fix
-//      put excessive stress on malloc/free and locks protecting the c-heap.
-//
-// We currently use (4).  See the WorkAroundNTPLTimedWaitHang flag.
-// It may be possible to refine (4) by checking the kernel and NTPL verisons
-// and only enabling the work-around for vulnerable environments.
 
 // utility to compute the abstime argument to timedwait:
 // millis is the relative timeout time
@@ -4208,10 +4153,6 @@
 
   while (_Event < 0) {
     status = pthread_cond_timedwait(_cond, _mutex, &abst);
-    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
-      pthread_cond_destroy(_cond);
-      pthread_cond_init(_cond, NULL);
-    }
     assert_status(status == 0 || status == EINTR ||
                   status == ETIMEDOUT,
                   status, "cond_timedwait");
@@ -4255,10 +4196,6 @@
   assert_status(status == 0, status, "mutex_lock");
   int AnyWaiters = _nParked;
   assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
-  if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
-    AnyWaiters = 0;
-    pthread_cond_signal(_cond);
-  }
   status = pthread_mutex_unlock(_mutex);
   assert_status(status == 0, status, "mutex_unlock");
   if (AnyWaiters != 0) {
@@ -4391,7 +4328,7 @@
   if (_counter > 0)  { // no wait needed
     _counter = 0;
     status = pthread_mutex_unlock(_mutex);
-    assert(status == 0, "invariant");
+    assert_status(status == 0, status, "invariant");
     // Paranoia to ensure our locked and lock-free paths interact
     // correctly with each other and Java-level accesses.
     OrderAccess::fence();
@@ -4414,10 +4351,6 @@
     status = pthread_cond_wait(_cond, _mutex);
   } else {
     status = pthread_cond_timedwait(_cond, _mutex, &absTime);
-    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
-      pthread_cond_destroy(_cond);
-      pthread_cond_init(_cond, NULL);
-    }
   }
   assert_status(status == 0 || status == EINTR ||
                 status == ETIMEDOUT,
@@ -4442,24 +4375,14 @@
 
 void Parker::unpark() {
   int status = pthread_mutex_lock(_mutex);
-  assert(status == 0, "invariant");
+  assert_status(status == 0, status, "invariant");
   const int s = _counter;
   _counter = 1;
+  status = pthread_mutex_unlock(_mutex);
+  assert_status(status == 0, status, "invariant");
   if (s < 1) {
-    if (WorkAroundNPTLTimedWaitHang) {
-      status = pthread_cond_signal(_cond);
-      assert(status == 0, "invariant");
-      status = pthread_mutex_unlock(_mutex);
-      assert(status == 0, "invariant");
-    } else {
-      status = pthread_mutex_unlock(_mutex);
-      assert(status == 0, "invariant");
-      status = pthread_cond_signal(_cond);
-      assert(status == 0, "invariant");
-    }
-  } else {
-    pthread_mutex_unlock(_mutex);
-    assert(status == 0, "invariant");
+    status = pthread_cond_signal(_cond);
+    assert_status(status == 0, status, "invariant");
   }
 }
 
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Wed Feb 10 14:30:25 2016 +0100
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Wed Feb 10 18:57:19 2016 -0500
@@ -5349,61 +5349,6 @@
 //        could have been signaled after a wait started
 //    1 : signaled - thread is running or ready
 //
-// Beware -- Some versions of NPTL embody a flaw where pthread_cond_timedwait() can
-// hang indefinitely.  For instance NPTL 0.60 on 2.4.21-4ELsmp is vulnerable.
-// For specifics regarding the bug see GLIBC BUGID 261237 :
-//    http://www.mail-archive.com/debian-glibc@lists.debian.org/msg10837.html.
-// Briefly, pthread_cond_timedwait() calls with an expiry time that's not in the future
-// will either hang or corrupt the condvar, resulting in subsequent hangs if the condvar
-// is used.  (The simple C test-case provided in the GLIBC bug report manifests the
-// hang).  The JVM is vulernable via sleep(), Object.wait(timo), LockSupport.parkNanos()
-// and monitorenter when we're using 1-0 locking.  All those operations may result in
-// calls to pthread_cond_timedwait().  Using LD_ASSUME_KERNEL to use an older version
-// of libpthread avoids the problem, but isn't practical.
-//
-// Possible remedies:
-//
-// 1.   Establish a minimum relative wait time.  50 to 100 msecs seems to work.
-//      This is palliative and probabilistic, however.  If the thread is preempted
-//      between the call to compute_abstime() and pthread_cond_timedwait(), more
-//      than the minimum period may have passed, and the abstime may be stale (in the
-//      past) resultin in a hang.   Using this technique reduces the odds of a hang
-//      but the JVM is still vulnerable, particularly on heavily loaded systems.
-//
-// 2.   Modify park-unpark to use per-thread (per ParkEvent) pipe-pairs instead
-//      of the usual flag-condvar-mutex idiom.  The write side of the pipe is set
-//      NDELAY. unpark() reduces to write(), park() reduces to read() and park(timo)
-//      reduces to poll()+read().  This works well, but consumes 2 FDs per extant
-//      thread.
-//
-// 3.   Embargo pthread_cond_timedwait() and implement a native "chron" thread
-//      that manages timeouts.  We'd emulate pthread_cond_timedwait() by enqueuing
-//      a timeout request to the chron thread and then blocking via pthread_cond_wait().
-//      This also works well.  In fact it avoids kernel-level scalability impediments
-//      on certain platforms that don't handle lots of active pthread_cond_timedwait()
-//      timers in a graceful fashion.
-//
-// 4.   When the abstime value is in the past it appears that control returns
-//      correctly from pthread_cond_timedwait(), but the condvar is left corrupt.
-//      Subsequent timedwait/wait calls may hang indefinitely.  Given that, we
-//      can avoid the problem by reinitializing the condvar -- by cond_destroy()
-//      followed by cond_init() -- after all calls to pthread_cond_timedwait().
-//      It may be possible to avoid reinitialization by checking the return
-//      value from pthread_cond_timedwait().  In addition to reinitializing the
-//      condvar we must establish the invariant that cond_signal() is only called
-//      within critical sections protected by the adjunct mutex.  This prevents
-//      cond_signal() from "seeing" a condvar that's in the midst of being
-//      reinitialized or that is corrupt.  Sadly, this invariant obviates the
-//      desirable signal-after-unlock optimization that avoids futile context switching.
-//
-//      I'm also concerned that some versions of NTPL might allocate an auxilliary
-//      structure when a condvar is used or initialized.  cond_destroy()  would
-//      release the helper structure.  Our reinitialize-after-timedwait fix
-//      put excessive stress on malloc/free and locks protecting the c-heap.
-//
-// We currently use (4).  See the WorkAroundNTPLTimedWaitHang flag.
-// It may be possible to refine (4) by checking the kernel and NTPL verisons
-// and only enabling the work-around for vulnerable environments.
 
 // utility to compute the abstime argument to timedwait:
 // millis is the relative timeout time
@@ -5529,10 +5474,6 @@
 
   while (_Event < 0) {
     status = pthread_cond_timedwait(_cond, _mutex, &abst);
-    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
-      pthread_cond_destroy(_cond);
-      pthread_cond_init(_cond, os::Linux::condAttr());
-    }
     assert_status(status == 0 || status == EINTR ||
                   status == ETIME || status == ETIMEDOUT,
                   status, "cond_timedwait");
@@ -5576,10 +5517,6 @@
   assert_status(status == 0, status, "mutex_lock");
   int AnyWaiters = _nParked;
   assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant");
-  if (AnyWaiters != 0 && WorkAroundNPTLTimedWaitHang) {
-    AnyWaiters = 0;
-    pthread_cond_signal(_cond);
-  }
   status = pthread_mutex_unlock(_mutex);
   assert_status(status == 0, status, "mutex_unlock");
   if (AnyWaiters != 0) {
@@ -5731,7 +5668,7 @@
   if (_counter > 0)  { // no wait needed
     _counter = 0;
     status = pthread_mutex_unlock(_mutex);
-    assert(status == 0, "invariant");
+    assert_status(status == 0, status, "invariant");
     // Paranoia to ensure our locked and lock-free paths interact
     // correctly with each other and Java-level accesses.
     OrderAccess::fence();
@@ -5757,10 +5694,6 @@
   } else {
     _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
     status = pthread_cond_timedwait(&_cond[_cur_index], _mutex, &absTime);
-    if (status != 0 && WorkAroundNPTLTimedWaitHang) {
-      pthread_cond_destroy(&_cond[_cur_index]);
-      pthread_cond_init(&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
-    }
   }
   _cur_index = -1;
   assert_status(status == 0 || status == EINTR ||
@@ -5786,33 +5719,17 @@
 
 void Parker::unpark() {
   int status = pthread_mutex_lock(_mutex);
-  assert(status == 0, "invariant");
+  assert_status(status == 0, status, "invariant");
   const int s = _counter;
   _counter = 1;
-  if (s < 1) {
-    // thread might be parked
-    if (_cur_index != -1) {
-      // thread is definitely parked
-      if (WorkAroundNPTLTimedWaitHang) {
-        status = pthread_cond_signal(&_cond[_cur_index]);
-        assert(status == 0, "invariant");
-        status = pthread_mutex_unlock(_mutex);
-        assert(status == 0, "invariant");
-      } else {
-        // must capture correct index before unlocking
-        int index = _cur_index;
-        status = pthread_mutex_unlock(_mutex);
-        assert(status == 0, "invariant");
-        status = pthread_cond_signal(&_cond[index]);
-        assert(status == 0, "invariant");
-      }
-    } else {
-      pthread_mutex_unlock(_mutex);
-      assert(status == 0, "invariant");
-    }
-  } else {
-    pthread_mutex_unlock(_mutex);
-    assert(status == 0, "invariant");
+  // must capture correct index before unlocking
+  int index = _cur_index;
+  status = pthread_mutex_unlock(_mutex);
+  assert_status(status == 0, status, "invariant");
+  if (s < 1 && _cur_index != -1) {
+    // thread is definitely parked
+    status = pthread_cond_signal(&_cond[index]);
+    assert_status(status == 0, status, "invariant");
   }
 }
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Feb 10 14:30:25 2016 +0100
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Feb 10 18:57:19 2016 -0500
@@ -1279,10 +1279,6 @@
   experimental(intx, hashCode, 5,                                           \
                "(Unstable) select hashCode generation algorithm")           \
                                                                             \
-  experimental(intx, WorkAroundNPTLTimedWaitHang, 0,                        \
-               "(Unstable, Linux-specific) "                                \
-               "avoid NPTL-FUTEX hang pthread_cond_timedwait")              \
-                                                                            \
   product(bool, FilterSpuriousWakeups, true,                                \
           "When true prevents OS-level spurious, or premature, wakeups "    \
           "from Object.wait (Ignored for Windows)")                         \