8075171: Contended Locking fast notify bucket
authordcubed
Tue, 21 Jul 2015 07:28:37 -0700
changeset 31856 614d6786ba55
parent 31855 1550546a6e8e
child 31867 5d39059535ba
child 31963 641ed52732ec
child 31964 d34ad1715d96
child 31981 9caa094a485f
child 31985 f63ab3cbd931
8075171: Contended Locking fast notify bucket Summary: JEP-143/JDK-8073165 Contended Locking fast notify bucket Reviewed-by: dholmes, acorn, dice, dcubed Contributed-by: dave.dice@oracle.com, karen.kinnear@oracle.com, daniel.daugherty@oracle.com
hotspot/src/share/vm/classfile/vmSymbols.hpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/runtime.cpp
hotspot/src/share/vm/opto/runtime.hpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/objectMonitor.cpp
hotspot/src/share/vm/runtime/objectMonitor.hpp
hotspot/src/share/vm/runtime/synchronizer.cpp
hotspot/src/share/vm/runtime/synchronizer.hpp
hotspot/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Tue Jul 21 07:28:37 2015 -0700
@@ -679,6 +679,10 @@
    do_name(     getClass_name,                                   "getClass")                                            \
   do_intrinsic(_clone,                    java_lang_Object,       clone_name, void_object_signature,             F_R)   \
    do_name(     clone_name,                                      "clone")                                               \
+  do_intrinsic(_notify,                   java_lang_Object,       notify_name, void_method_signature,            F_R)   \
+   do_name(     notify_name,                                     "notify")                                              \
+  do_intrinsic(_notifyAll,                java_lang_Object,       notifyAll_name, void_method_signature,         F_R)   \
+   do_name(     notifyAll_name,                                  "notifyAll")                                           \
                                                                                                                         \
   /* Math & StrictMath intrinsics are defined in terms of just a few signatures: */                                     \
   do_class(java_lang_Math,                "java/lang/Math")                                                             \
--- a/hotspot/src/share/vm/opto/library_call.cpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Tue Jul 21 07:28:37 2015 -0700
@@ -225,6 +225,7 @@
   bool inline_pow();
   Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
   bool inline_min_max(vmIntrinsics::ID id);
+  bool inline_notify(vmIntrinsics::ID id);
   Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
   int classify_unsafe_addr(Node* &base, Node* &offset);
@@ -776,6 +777,13 @@
   case vmIntrinsics::_min:
   case vmIntrinsics::_max:                      return inline_min_max(intrinsic_id());
 
+  case vmIntrinsics::_notify:
+  case vmIntrinsics::_notifyAll:
+    if (InlineNotify) {
+      return inline_notify(intrinsic_id());
+    }
+    return false;
+
   case vmIntrinsics::_addExactI:                return inline_math_addExactI(false /* add */);
   case vmIntrinsics::_addExactL:                return inline_math_addExactL(false /* add */);
   case vmIntrinsics::_decrementExactI:          return inline_math_subtractExactI(true /* decrement */);
@@ -2075,6 +2083,21 @@
           );
 }
 
+//----------------------------inline_notify-----------------------------------*
+bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {
+  const TypeFunc* ftype = OptoRuntime::monitor_notify_Type();
+  address func;
+  if (id == vmIntrinsics::_notify) {
+    func = OptoRuntime::monitor_notify_Java();
+  } else {
+    func = OptoRuntime::monitor_notifyAll_Java();
+  }
+  Node* call = make_runtime_call(RC_NO_LEAF, ftype, func, NULL, TypeRawPtr::BOTTOM, argument(0));
+  make_slow_call_ex(call, env()->Throwable_klass(), false);
+  return true;
+}
+
+
 //----------------------------inline_min_max-----------------------------------
 bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
   set_result(generate_min_max(id, argument(0), argument(1)));
--- a/hotspot/src/share/vm/opto/runtime.cpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Tue Jul 21 07:28:37 2015 -0700
@@ -96,6 +96,8 @@
 address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
+address OptoRuntime::_monitor_notify_Java                         = NULL;
+address OptoRuntime::_monitor_notifyAll_Java                      = NULL;
 address OptoRuntime::_rethrow_Java                                = NULL;
 
 address OptoRuntime::_slow_arraycopy_Java                         = NULL;
@@ -144,6 +146,8 @@
   gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
   gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
   gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C, 0, false, false, false);
+  gen(env, _monitor_notify_Java            , monitor_notify_Type          , monitor_notify_C                ,    0 , false, false, false);
+  gen(env, _monitor_notifyAll_Java         , monitor_notify_Type          , monitor_notifyAll_C             ,    0 , false, false, false);
   gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );
 
   gen(env, _slow_arraycopy_Java            , slow_arraycopy_Type          , SharedRuntime::slow_arraycopy_C ,    0 , false, false, false);
@@ -426,6 +430,45 @@
   thread->set_vm_result(obj);
 JRT_END
 
+JRT_BLOCK_ENTRY(void, OptoRuntime::monitor_notify_C(oopDesc* obj, JavaThread *thread))
+
+  // Very few notify/notifyAll operations find any threads on the waitset, so
+  // the dominant fast-path is to simply return.
+  // Relatedly, it's critical that notify/notifyAll be fast in order to
+  // reduce lock hold times.
+  if (!SafepointSynchronize::is_synchronizing()) {
+    if (ObjectSynchronizer::quick_notify(obj, thread, false)) {
+      return;
+    }
+  }
+
+  // This is the case the fast-path above isn't provisioned to handle.
+  // The fast-path is designed to handle frequently arising cases in an efficient manner.
+  // (The fast-path is just a degenerate variant of the slow-path).
+  // Perform the dreaded state transition and pass control into the slow-path.
+  JRT_BLOCK;
+  Handle h_obj(THREAD, obj);
+  ObjectSynchronizer::notify(h_obj, CHECK);
+  JRT_BLOCK_END;
+JRT_END
+
+JRT_BLOCK_ENTRY(void, OptoRuntime::monitor_notifyAll_C(oopDesc* obj, JavaThread *thread))
+
+  if (!SafepointSynchronize::is_synchronizing() ) {
+    if (ObjectSynchronizer::quick_notify(obj, thread, true)) {
+      return;
+    }
+  }
+
+  // This is the case the fast-path above isn't provisioned to handle.
+  // The fast-path is designed to handle frequently arising cases in an efficient manner.
+  // (The fast-path is just a degenerate variant of the slow-path).
+  // Perform the dreaded state transition and pass control into the slow-path.
+  JRT_BLOCK;
+  Handle h_obj(THREAD, obj);
+  ObjectSynchronizer::notifyall(h_obj, CHECK);
+  JRT_BLOCK_END;
+JRT_END
 
 const TypeFunc *OptoRuntime::new_instance_Type() {
   // create input type (domain)
@@ -604,14 +647,26 @@
   fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
   fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;    // Address of stack location for lock - BasicLock
   fields[TypeFunc::Parms+2] = TypeRawPtr::BOTTOM;    // Thread pointer (Self)
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3,fields);
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3, fields);
 
   // create result type (range)
   fields = TypeTuple::fields(0);
 
-  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
 
-  return TypeFunc::make(domain,range);
+const TypeFunc *OptoRuntime::monitor_notify_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+  return TypeFunc::make(domain, range);
 }
 
 const TypeFunc* OptoRuntime::flush_windows_Type() {
--- a/hotspot/src/share/vm/opto/runtime.hpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Tue Jul 21 07:28:37 2015 -0700
@@ -146,6 +146,8 @@
   static address _vtable_must_compile_Java;
   static address _complete_monitor_locking_Java;
   static address _rethrow_Java;
+  static address _monitor_notify_Java;
+  static address _monitor_notifyAll_Java;
 
   static address _slow_arraycopy_Java;
   static address _register_finalizer_Java;
@@ -186,6 +188,9 @@
   static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
   static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
 
+  static void monitor_notify_C(oopDesc* obj, JavaThread* thread);
+  static void monitor_notifyAll_C(oopDesc* obj, JavaThread* thread);
+
 private:
 
   // Implicit exception support
@@ -244,7 +249,9 @@
   static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
   static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
   static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
-  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }
+  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java; }
+  static address monitor_notify_Java()                   { return _monitor_notify_Java; }
+  static address monitor_notifyAll_Java()                { return _monitor_notifyAll_Java; }
 
   static address slow_arraycopy_Java()                   { return _slow_arraycopy_Java; }
   static address register_finalizer_Java()               { return _register_finalizer_Java; }
@@ -285,6 +292,7 @@
   static const TypeFunc* g1_wb_post_Type();
   static const TypeFunc* complete_monitor_enter_Type();
   static const TypeFunc* complete_monitor_exit_Type();
+  static const TypeFunc* monitor_notify_Type();
   static const TypeFunc* uncommon_trap_Type();
   static const TypeFunc* athrow_Type();
   static const TypeFunc* rethrow_Type();
--- a/hotspot/src/share/vm/runtime/globals.hpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Tue Jul 21 07:28:37 2015 -0700
@@ -1286,6 +1286,8 @@
                                                                             \
   experimental(intx, SyncVerbose, 0, "(Unstable)")                          \
                                                                             \
+  product(bool, InlineNotify, true, "intrinsify subset of notify" )         \
+                                                                            \
   experimental(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)")               \
                                                                             \
   experimental(intx, hashCode, 5,                                           \
--- a/hotspot/src/share/vm/runtime/objectMonitor.cpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/runtime/objectMonitor.cpp	Tue Jul 21 07:28:37 2015 -0700
@@ -430,6 +430,8 @@
   return -1;
 }
 
+#define MAX_RECHECK_INTERVAL 1000
+
 void NOINLINE ObjectMonitor::EnterI(TRAPS) {
   Thread * const Self = THREAD;
   assert(Self->is_Java_thread(), "invariant");
@@ -539,7 +541,7 @@
 
   TEVENT(Inflated enter - Contention);
   int nWakeups = 0;
-  int RecheckInterval = 1;
+  int recheckInterval = 1;
 
   for (;;) {
 
@@ -553,10 +555,12 @@
     // park self
     if (_Responsible == Self || (SyncFlags & 1)) {
       TEVENT(Inflated enter - park TIMED);
-      Self->_ParkEvent->park((jlong) RecheckInterval);
-      // Increase the RecheckInterval, but clamp the value.
-      RecheckInterval *= 8;
-      if (RecheckInterval > 1000) RecheckInterval = 1000;
+      Self->_ParkEvent->park((jlong) recheckInterval);
+      // Increase the recheckInterval, but clamp the value.
+      recheckInterval *= 8;
+      if (recheckInterval > MAX_RECHECK_INTERVAL) {
+        recheckInterval = MAX_RECHECK_INTERVAL;
+      }
     } else {
       TEVENT(Inflated enter - park UNTIMED);
       Self->_ParkEvent->park();
@@ -709,7 +713,7 @@
       // or java_suspend_self()
       jt->set_suspend_equivalent();
       if (SyncFlags & 1) {
-        Self->_ParkEvent->park((jlong)1000);
+        Self->_ParkEvent->park((jlong)MAX_RECHECK_INTERVAL);
       } else {
         Self->_ParkEvent->park();
       }
@@ -1636,15 +1640,8 @@
 // then instead of transferring a thread from the WaitSet to the EntryList
 // we might just dequeue a thread from the WaitSet and directly unpark() it.
 
-void ObjectMonitor::notify(TRAPS) {
-  CHECK_OWNER();
-  if (_WaitSet == NULL) {
-    TEVENT(Empty-Notify);
-    return;
-  }
-  DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
-
-  int Policy = Knob_MoveNotifyee;
+void ObjectMonitor::INotify(Thread * Self) {
+  const int policy = Knob_MoveNotifyee;
 
   Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notify");
   ObjectWaiter * iterator = DequeueWaiter();
@@ -1652,74 +1649,76 @@
     TEVENT(Notify1 - Transfer);
     guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant");
     guarantee(iterator->_notified == 0, "invariant");
-    if (Policy != 4) {
+    // Disposition - what might we do with iterator ?
+    // a.  add it directly to the EntryList - either tail (policy == 1)
+    //     or head (policy == 0).
+    // b.  push it onto the front of the _cxq (policy == 2).
+    // For now we use (b).
+    if (policy != 4) {
       iterator->TState = ObjectWaiter::TS_ENTER;
     }
     iterator->_notified = 1;
-    Thread * Self = THREAD;
     iterator->_notifier_tid = Self->osthread()->thread_id();
 
-    ObjectWaiter * List = _EntryList;
-    if (List != NULL) {
-      assert(List->_prev == NULL, "invariant");
-      assert(List->TState == ObjectWaiter::TS_ENTER, "invariant");
-      assert(List != iterator, "invariant");
+    ObjectWaiter * list = _EntryList;
+    if (list != NULL) {
+      assert(list->_prev == NULL, "invariant");
+      assert(list->TState == ObjectWaiter::TS_ENTER, "invariant");
+      assert(list != iterator, "invariant");
     }
 
-    if (Policy == 0) {       // prepend to EntryList
-      if (List == NULL) {
+    if (policy == 0) {       // prepend to EntryList
+      if (list == NULL) {
         iterator->_next = iterator->_prev = NULL;
         _EntryList = iterator;
       } else {
-        List->_prev = iterator;
-        iterator->_next = List;
+        list->_prev = iterator;
+        iterator->_next = list;
         iterator->_prev = NULL;
         _EntryList = iterator;
       }
-    } else if (Policy == 1) {      // append to EntryList
-      if (List == NULL) {
+    } else if (policy == 1) {      // append to EntryList
+      if (list == NULL) {
         iterator->_next = iterator->_prev = NULL;
         _EntryList = iterator;
       } else {
         // CONSIDER:  finding the tail currently requires a linear-time walk of
         // the EntryList.  We can make tail access constant-time by converting to
         // a CDLL instead of using our current DLL.
-        ObjectWaiter * Tail;
-        for (Tail = List; Tail->_next != NULL; Tail = Tail->_next) /* empty */;
-        assert(Tail != NULL && Tail->_next == NULL, "invariant");
-        Tail->_next = iterator;
-        iterator->_prev = Tail;
+        ObjectWaiter * tail;
+        for (tail = list; tail->_next != NULL; tail = tail->_next) /* empty */;
+        assert(tail != NULL && tail->_next == NULL, "invariant");
+        tail->_next = iterator;
+        iterator->_prev = tail;
         iterator->_next = NULL;
       }
-    } else if (Policy == 2) {      // prepend to cxq
-      // prepend to cxq
-      if (List == NULL) {
+    } else if (policy == 2) {      // prepend to cxq
+      if (list == NULL) {
         iterator->_next = iterator->_prev = NULL;
         _EntryList = iterator;
       } else {
         iterator->TState = ObjectWaiter::TS_CXQ;
         for (;;) {
-          ObjectWaiter * Front = _cxq;
-          iterator->_next = Front;
-          if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
+          ObjectWaiter * front = _cxq;
+          iterator->_next = front;
+          if (Atomic::cmpxchg_ptr(iterator, &_cxq, front) == front) {
             break;
           }
         }
       }
-    } else if (Policy == 3) {      // append to cxq
+    } else if (policy == 3) {      // append to cxq
       iterator->TState = ObjectWaiter::TS_CXQ;
       for (;;) {
-        ObjectWaiter * Tail;
-        Tail = _cxq;
-        if (Tail == NULL) {
+        ObjectWaiter * tail = _cxq;
+        if (tail == NULL) {
           iterator->_next = NULL;
-          if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
+          if (Atomic::cmpxchg_ptr(iterator, &_cxq, NULL) == NULL) {
             break;
           }
         } else {
-          while (Tail->_next != NULL) Tail = Tail->_next;
-          Tail->_next = iterator;
-          iterator->_prev = Tail;
+          while (tail->_next != NULL) tail = tail->_next;
+          tail->_next = iterator;
+          iterator->_prev = tail;
           iterator->_next = NULL;
           break;
         }
@@ -1731,10 +1730,6 @@
       ev->unpark();
     }
 
-    if (Policy < 4) {
-      iterator->wait_reenter_begin(this);
-    }
-
     // _WaitSetLock protects the wait queue, not the EntryList.  We could
     // move the add-to-EntryList operation, above, outside the critical section
     // protected by _WaitSetLock.  In practice that's not useful.  With the
@@ -1742,133 +1737,63 @@
     // is the only thread that grabs _WaitSetLock.  There's almost no contention
     // on _WaitSetLock so it's not profitable to reduce the length of the
     // critical section.
+
+    if (policy < 4) {
+      iterator->wait_reenter_begin(this);
+    }
   }
-
   Thread::SpinRelease(&_WaitSetLock);
+}
 
-  if (iterator != NULL && ObjectMonitor::_sync_Notifications != NULL) {
-    ObjectMonitor::_sync_Notifications->inc();
+// Consider: a not-uncommon synchronization bug is to use notify() when
+// notifyAll() is more appropriate, potentially resulting in stranded
+// threads; this is one example of a lost wakeup. A useful diagnostic
+// option is to force all notify() operations to behave as notifyAll().
+//
+// Note: We can also detect many such problems with a "minimum wait".
+// When the "minimum wait" is set to a small non-zero timeout value
+// and the program does not hang whereas it did absent "minimum wait",
+// that suggests a lost wakeup bug. The '-XX:SyncFlags=1' option uses
+// a "minimum wait" for all park() operations; see the recheckInterval
+// variable and MAX_RECHECK_INTERVAL.
+
+void ObjectMonitor::notify(TRAPS) {
+  CHECK_OWNER();
+  if (_WaitSet == NULL) {
+    TEVENT(Empty-Notify);
+    return;
+  }
+  DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
+  INotify(THREAD);
+  if (ObjectMonitor::_sync_Notifications != NULL) {
+    ObjectMonitor::_sync_Notifications->inc(1);
   }
 }
 
 
+// The current implementation of notifyAll() transfers the waiters one-at-a-time
+// from the waitset to the EntryList. This could be done more efficiently with a
+// single bulk transfer but in practice it's not time-critical. Beware too,
+// that in prepend-mode we invert the order of the waiters. Let's say that the
+// waitset is "ABCD" and the EntryList is "XYZ". After a notifyAll() in prepend
+// mode the waitset will be empty and the EntryList will be "DCBAXYZ".
+
 void ObjectMonitor::notifyAll(TRAPS) {
   CHECK_OWNER();
-  ObjectWaiter* iterator;
   if (_WaitSet == NULL) {
     TEVENT(Empty-NotifyAll);
     return;
   }
+
   DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD);
-
-  int Policy = Knob_MoveNotifyee;
-  int Tally = 0;
-  Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notifyall");
-
-  for (;;) {
-    iterator = DequeueWaiter();
-    if (iterator == NULL) break;
-    TEVENT(NotifyAll - Transfer1);
-    ++Tally;
-
-    // Disposition - what might we do with iterator ?
-    // a.  add it directly to the EntryList - either tail or head.
-    // b.  push it onto the front of the _cxq.
-    // For now we use (a).
-
-    guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant");
-    guarantee(iterator->_notified == 0, "invariant");
-    iterator->_notified = 1;
-    Thread * Self = THREAD;
-    iterator->_notifier_tid = Self->osthread()->thread_id();
-    if (Policy != 4) {
-      iterator->TState = ObjectWaiter::TS_ENTER;
-    }
-
-    ObjectWaiter * List = _EntryList;
-    if (List != NULL) {
-      assert(List->_prev == NULL, "invariant");
-      assert(List->TState == ObjectWaiter::TS_ENTER, "invariant");
-      assert(List != iterator, "invariant");
-    }
-
-    if (Policy == 0) {       // prepend to EntryList
-      if (List == NULL) {
-        iterator->_next = iterator->_prev = NULL;
-        _EntryList = iterator;
-      } else {
-        List->_prev = iterator;
-        iterator->_next = List;
-        iterator->_prev = NULL;
-        _EntryList = iterator;
-      }
-    } else if (Policy == 1) {      // append to EntryList
-      if (List == NULL) {
-        iterator->_next = iterator->_prev = NULL;
-        _EntryList = iterator;
-      } else {
-        // CONSIDER:  finding the tail currently requires a linear-time walk of
-        // the EntryList.  We can make tail access constant-time by converting to
-        // a CDLL instead of using our current DLL.
-        ObjectWaiter * Tail;
-        for (Tail = List; Tail->_next != NULL; Tail = Tail->_next) /* empty */;
-        assert(Tail != NULL && Tail->_next == NULL, "invariant");
-        Tail->_next = iterator;
-        iterator->_prev = Tail;
-        iterator->_next = NULL;
-      }
-    } else if (Policy == 2) {      // prepend to cxq
-      // prepend to cxq
-      iterator->TState = ObjectWaiter::TS_CXQ;
-      for (;;) {
-        ObjectWaiter * Front = _cxq;
-        iterator->_next = Front;
-        if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
-          break;
-        }
-      }
-    } else if (Policy == 3) {      // append to cxq
-      iterator->TState = ObjectWaiter::TS_CXQ;
-      for (;;) {
-        ObjectWaiter * Tail;
-        Tail = _cxq;
-        if (Tail == NULL) {
-          iterator->_next = NULL;
-          if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
-            break;
-          }
-        } else {
-          while (Tail->_next != NULL) Tail = Tail->_next;
-          Tail->_next = iterator;
-          iterator->_prev = Tail;
-          iterator->_next = NULL;
-          break;
-        }
-      }
-    } else {
-      ParkEvent * ev = iterator->_event;
-      iterator->TState = ObjectWaiter::TS_RUN;
-      OrderAccess::fence();
-      ev->unpark();
-    }
-
-    if (Policy < 4) {
-      iterator->wait_reenter_begin(this);
-    }
-
-    // _WaitSetLock protects the wait queue, not the EntryList.  We could
-    // move the add-to-EntryList operation, above, outside the critical section
-    // protected by _WaitSetLock.  In practice that's not useful.  With the
-    // exception of  wait() timeouts and interrupts the monitor owner
-    // is the only thread that grabs _WaitSetLock.  There's almost no contention
-    // on _WaitSetLock so it's not profitable to reduce the length of the
-    // critical section.
+  int tally = 0;
+  while (_WaitSet != NULL) {
+    tally++;
+    INotify(THREAD);
   }
 
-  Thread::SpinRelease(&_WaitSetLock);
-
-  if (Tally != 0 && ObjectMonitor::_sync_Notifications != NULL) {
-    ObjectMonitor::_sync_Notifications->inc(Tally);
+  if (tally != 0 && ObjectMonitor::_sync_Notifications != NULL) {
+    ObjectMonitor::_sync_Notifications->inc(tally);
   }
 }
 
--- a/hotspot/src/share/vm/runtime/objectMonitor.hpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp	Tue Jul 21 07:28:37 2015 -0700
@@ -332,7 +332,7 @@
  private:
   void      AddWaiter(ObjectWaiter * waiter);
   static    void DeferredInitialize();
-
+  void      INotify(Thread * Self);
   ObjectWaiter * DequeueWaiter();
   void      DequeueSpecificWaiter(ObjectWaiter * waiter);
   void      EnterI(TRAPS);
--- a/hotspot/src/share/vm/runtime/synchronizer.cpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/runtime/synchronizer.cpp	Tue Jul 21 07:28:37 2015 -0700
@@ -86,6 +86,8 @@
     }                                                                      \
   }
 
+#define HOTSPOT_MONITOR_PROBE_notify HOTSPOT_MONITOR_NOTIFY
+#define HOTSPOT_MONITOR_PROBE_notifyAll HOTSPOT_MONITOR_NOTIFYALL
 #define HOTSPOT_MONITOR_PROBE_waited HOTSPOT_MONITOR_WAITED
 
 #define DTRACE_MONITOR_PROBE(probe, monitor, obj, thread)                  \
@@ -146,6 +148,58 @@
 // operators: safepoints or indefinite blocking (blocking that might span a
 // safepoint) are forbidden. Generally the thread_state() is _in_Java upon
 // entry.
+//
+// Consider: An interesting optimization is to have the JIT recognize the
+// following common idiom:
+//   synchronized (someobj) { .... ; notify(); }
+// That is, we find a notify() or notifyAll() call that immediately precedes
+// the monitorexit operation.  In that case the JIT could fuse the operations
+// into a single notifyAndExit() runtime primitive.
+
+bool ObjectSynchronizer::quick_notify(oopDesc * obj, Thread * self, bool all) {
+  assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
+  assert(self->is_Java_thread(), "invariant");
+  assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant");
+  No_Safepoint_Verifier nsv;
+  if (obj == NULL) return false;  // slow-path for invalid obj
+  const markOop mark = obj->mark();
+
+  if (mark->has_locker() && self->is_lock_owned((address)mark->locker())) {
+    // Degenerate notify
+    // stack-locked by caller so by definition the implied waitset is empty.
+    return true;
+  }
+
+  if (mark->has_monitor()) {
+    ObjectMonitor * const mon = mark->monitor();
+    assert(mon->object() == obj, "invariant");
+    if (mon->owner() != self) return false;  // slow-path for IMS exception
+
+    if (mon->first_waiter() != NULL) {
+      // We have one or more waiters. Since this is an inflated monitor
+      // that we own, we can transfer one or more threads from the waitset
+      // to the entrylist here and now, avoiding the slow-path.
+      if (all) {
+        DTRACE_MONITOR_PROBE(notifyAll, mon, obj, self);
+      } else {
+        DTRACE_MONITOR_PROBE(notify, mon, obj, self);
+      }
+      int tally = 0;
+      do {
+        mon->INotify(self);
+        ++tally;
+      } while (mon->first_waiter() != NULL && all);
+      if (ObjectMonitor::_sync_Notifications != NULL) {
+        ObjectMonitor::_sync_Notifications->inc(tally);
+      }
+    }
+    return true;
+  }
+
+  // biased locking and any other IMS exception states take the slow-path
+  return false;
+}
+
 
 // The LockNode emitted directly at the synchronization site would have
 // been too big if it were to have included support for the cases of inflated
@@ -1451,8 +1505,7 @@
 // This is an unfortunate aspect of this design.
 
 enum ManifestConstants {
-  ClearResponsibleAtSTW   = 0,
-  MaximumRecheckInterval  = 1000
+  ClearResponsibleAtSTW = 0
 };
 
 // Deflate a single monitor if not in-use
--- a/hotspot/src/share/vm/runtime/synchronizer.hpp	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/src/share/vm/runtime/synchronizer.hpp	Tue Jul 21 07:28:37 2015 -0700
@@ -72,6 +72,7 @@
   static void notify(Handle obj, TRAPS);
   static void notifyall(Handle obj, TRAPS);
 
+  static bool quick_notify(oopDesc* obj, Thread* Self, bool All);
   static bool quick_enter(oop obj, Thread* Self, BasicLock* Lock);
 
   // Special internal-use-only method for use by JVM infrastructure
--- a/hotspot/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java	Sat Jul 18 04:53:32 2015 +0200
+++ b/hotspot/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java	Tue Jul 21 07:28:37 2015 -0700
@@ -58,8 +58,10 @@
         return getClass().getName() + "@" + Integer.toHexString(hashCode());
     }
 
+    @HotSpotIntrinsicCandidate
     public final native void notify();
 
+    @HotSpotIntrinsicCandidate
     public final native void notifyAll();
 
     public final native void wait(long timeout) throws InterruptedException;