8212827: GlobalCounter should support nested critical sections
authorkbarrett
Tue, 30 Oct 2018 18:06:35 -0400
changeset 52332 d2a3503c72f7
parent 52331 8d8702585652
child 52333 c401c536cea1
8212827: GlobalCounter should support nested critical sections Summary: Support nested critical sections. Reviewed-by: eosterlund, rehn, tschatzl
src/hotspot/share/utilities/concurrentHashTable.hpp
src/hotspot/share/utilities/concurrentHashTable.inline.hpp
src/hotspot/share/utilities/globalCounter.cpp
src/hotspot/share/utilities/globalCounter.hpp
src/hotspot/share/utilities/globalCounter.inline.hpp
src/hotspot/share/utilities/singleWriterSynchronizer.hpp
test/hotspot/gtest/utilities/test_globalCounter.cpp
test/hotspot/gtest/utilities/test_globalCounter_nested.cpp
--- a/src/hotspot/share/utilities/concurrentHashTable.hpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/src/hotspot/share/utilities/concurrentHashTable.hpp	Tue Oct 30 18:06:35 2018 -0400
@@ -25,6 +25,10 @@
 #ifndef SHARE_UTILITIES_CONCURRENT_HASH_TABLE_HPP
 #define SHARE_UTILITIES_CONCURRENT_HASH_TABLE_HPP
 
+#include "memory/allocation.hpp"
+#include "utilities/globalCounter.hpp"
+#include "utilities/globalDefinitions.hpp"
+
 // A mostly concurrent-hash-table where the read-side is wait-free, inserts are
 // CAS and deletes mutual exclude each other on per bucket-basis. VALUE is the
 // type kept inside each Node and CONFIG contains hash and allocation methods.
@@ -247,6 +251,7 @@
    protected:
     Thread* _thread;
     ConcurrentHashTable<VALUE, CONFIG, F>* _cht;
+    GlobalCounter::CSContext _cs_context;
    public:
     ScopedCS(Thread* thread, ConcurrentHashTable<VALUE, CONFIG, F>* cht);
     ~ScopedCS();
--- a/src/hotspot/share/utilities/concurrentHashTable.inline.hpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/src/hotspot/share/utilities/concurrentHashTable.inline.hpp	Tue Oct 30 18:06:35 2018 -0400
@@ -208,9 +208,10 @@
 template <typename VALUE, typename CONFIG, MEMFLAGS F>
 inline ConcurrentHashTable<VALUE, CONFIG, F>::
   ScopedCS::ScopedCS(Thread* thread, ConcurrentHashTable<VALUE, CONFIG, F>* cht)
-    : _thread(thread), _cht(cht)
+    : _thread(thread),
+      _cht(cht),
+      _cs_context(GlobalCounter::critical_section_begin(_thread))
 {
-  GlobalCounter::critical_section_begin(_thread);
   // This version is published now.
   if (OrderAccess::load_acquire(&_cht->_invisible_epoch) != NULL) {
     OrderAccess::release_store_fence(&_cht->_invisible_epoch, (Thread*)NULL);
@@ -221,7 +222,7 @@
 inline ConcurrentHashTable<VALUE, CONFIG, F>::
   ScopedCS::~ScopedCS()
 {
-  GlobalCounter::critical_section_end(_thread);
+  GlobalCounter::critical_section_end(_thread, _cs_context);
 }
 
 // BaseConfig
@@ -502,7 +503,7 @@
   // concurrent single deletes. The _invisible_epoch can only be used by the
   // owner of _resize_lock, us here. There we should not changed it in our
   // own read-side.
-  GlobalCounter::critical_section_begin(thread);
+  GlobalCounter::CSContext cs_context = GlobalCounter::critical_section_begin(thread);
   for (size_t bucket_it = start_idx; bucket_it < stop_idx; bucket_it++) {
     Bucket* bucket = table->get_bucket(bucket_it);
     Bucket* prefetch_bucket = (bucket_it+1) < stop_idx ?
@@ -514,7 +515,7 @@
         continue;
     }
 
-    GlobalCounter::critical_section_end(thread);
+    GlobalCounter::critical_section_end(thread, cs_context);
     // We left critical section but the bucket cannot be removed while we hold
     // the _resize_lock.
     bucket->lock();
@@ -530,9 +531,9 @@
       Node::destroy_node(ndel[node_it]);
       DEBUG_ONLY(ndel[node_it] = (Node*)POISON_PTR;)
     }
-    GlobalCounter::critical_section_begin(thread);
+    cs_context = GlobalCounter::critical_section_begin(thread);
   }
-  GlobalCounter::critical_section_end(thread);
+  GlobalCounter::critical_section_end(thread, cs_context);
 }
 
 template <typename VALUE, typename CONFIG, MEMFLAGS F>
--- a/src/hotspot/share/utilities/globalCounter.cpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/src/hotspot/share/utilities/globalCounter.cpp	Tue Oct 30 18:06:35 2018 -0400
@@ -59,8 +59,8 @@
 void GlobalCounter::write_synchronize() {
   assert((*Thread::current()->get_rcu_counter() & COUNTER_ACTIVE) == 0x0, "must be outside a critcal section");
   // Atomic::add must provide fence since we have storeload dependency.
-  volatile uintx gbl_cnt = Atomic::add((uintx)COUNTER_INCREMENT, &_global_counter._counter,
-                                       memory_order_conservative);
+  uintx gbl_cnt = Atomic::add(COUNTER_INCREMENT, &_global_counter._counter);
+
   // Handle bootstrap
   if (Threads::number_of_threads() == 0) {
     return;
--- a/src/hotspot/share/utilities/globalCounter.hpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/src/hotspot/share/utilities/globalCounter.hpp	Tue Oct 30 18:06:35 2018 -0400
@@ -33,8 +33,9 @@
 // The GlobalCounter provides a synchronization mechanism between threads for
 // safe memory reclamation and other ABA problems. All readers must call
 // critical_section_begin before reading the volatile data and
-// critical_section_end afterwards. The write side must call write_synchronize
-// before reclaming the memory. The read-path only does an uncontented store
+// critical_section_end afterwards. Such read-side critical sections may
+// be properly nested. The write side must call write_synchronize
+// before reclaming the memory. The read-path only does an uncontended store
 // to a thread-local-storage and fence to stop any loads from floating up, thus
 // light weight and wait-free. The write-side is more heavy since it must check
 // all readers and wait until they have left the generation. (a system memory
@@ -62,20 +63,26 @@
   class CounterThreadCheck;
 
  public:
-  // Must be called before accessing the data. Only threads accessible lock-free
-  // can used this. Those included now are all Threads on SMR ThreadsList and
-  // the VMThread. Nesting is not yet supported.
-  static void critical_section_begin(Thread *thread);
+  // The type of the critical section context passed from
+  // critical_section_begin() to critical_section_end().
+  typedef uintx CSContext;
 
-  // Must be called after finished accessing the data.
-  // Do not provide fence, allows load/stores moving into the critical section.
-  static void critical_section_end(Thread *thread);
+  // Must be called before accessing the data.  The result must be passed
+  // to the associated call to critical_section_end().  Acts as a full
+  // memory barrier before the code within the critical section.
+  static CSContext critical_section_begin(Thread *thread);
+
+  // Must be called after finished accessing the data.  The context
+  // must be the result of the associated initiating critical_section_begin().
+  // Acts as a release memory barrier after the code within the critical
+  // section.
+  static void critical_section_end(Thread *thread, CSContext context);
 
   // Make the data inaccessible to readers before calling. When this call
-  // returns it's safe to reclaim the data.
+  // returns it's safe to reclaim the data.  Acts as a full memory barrier.
   static void write_synchronize();
 
-  // A scoped object for a reads-side critical-section.
+  // A scoped object for a read-side critical-section.
   class CriticalSection;
 };
 
--- a/src/hotspot/share/utilities/globalCounter.inline.hpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/src/hotspot/share/utilities/globalCounter.inline.hpp	Tue Oct 30 18:06:35 2018 -0400
@@ -25,34 +25,46 @@
 #ifndef SHARE_UTILITIES_GLOBAL_COUNTER_INLINE_HPP
 #define SHARE_UTILITIES_GLOBAL_COUNTER_INLINE_HPP
 
+#include "runtime/atomic.hpp"
 #include "runtime/orderAccess.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/globalCounter.hpp"
 
-inline void GlobalCounter::critical_section_begin(Thread *thread) {
+inline GlobalCounter::CSContext
+GlobalCounter::critical_section_begin(Thread *thread) {
   assert(thread == Thread::current(), "must be current thread");
-  assert((*thread->get_rcu_counter() & COUNTER_ACTIVE) == 0x0, "nested critical sections, not supported yet");
-  uintx gbl_cnt = OrderAccess::load_acquire(&_global_counter._counter);
-  OrderAccess::release_store_fence(thread->get_rcu_counter(), gbl_cnt | COUNTER_ACTIVE);
+  uintx old_cnt = Atomic::load(thread->get_rcu_counter());
+  // Retain the old counter value if already active, e.g. nested.
+  // Otherwise, set the counter to the current version + active bit.
+  uintx new_cnt = old_cnt;
+  if ((new_cnt & COUNTER_ACTIVE) == 0) {
+    new_cnt = Atomic::load(&_global_counter._counter) | COUNTER_ACTIVE;
+  }
+  OrderAccess::release_store_fence(thread->get_rcu_counter(), new_cnt);
+  return static_cast<CSContext>(old_cnt);
 }
 
-inline void GlobalCounter::critical_section_end(Thread *thread) {
+inline void
+GlobalCounter::critical_section_end(Thread *thread, CSContext context) {
   assert(thread == Thread::current(), "must be current thread");
   assert((*thread->get_rcu_counter() & COUNTER_ACTIVE) == COUNTER_ACTIVE, "must be in critical section");
-  // Mainly for debugging we set it to 'now'.
-  uintx gbl_cnt = OrderAccess::load_acquire(&_global_counter._counter);
-  OrderAccess::release_store(thread->get_rcu_counter(), gbl_cnt);
+  // Restore the counter value from before the associated begin.
+  OrderAccess::release_store(thread->get_rcu_counter(),
+                             static_cast<uintx>(context));
 }
 
 class GlobalCounter::CriticalSection {
  private:
   Thread* _thread;
+  CSContext _context;
  public:
-  inline CriticalSection(Thread* thread) : _thread(thread) {
-    GlobalCounter::critical_section_begin(_thread);
-  }
+  inline CriticalSection(Thread* thread) :
+    _thread(thread),
+    _context(GlobalCounter::critical_section_begin(_thread))
+  {}
+
   inline  ~CriticalSection() {
-    GlobalCounter::critical_section_end(_thread);
+    GlobalCounter::critical_section_end(_thread, _context);
   }
 };
 
--- a/src/hotspot/share/utilities/singleWriterSynchronizer.hpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/src/hotspot/share/utilities/singleWriterSynchronizer.hpp	Tue Oct 30 18:06:35 2018 -0400
@@ -36,7 +36,7 @@
 // Any number of threads may enter critical sections associated with a
 // synchronizer object.  One (at a time) other thread may wait for the
 // completion of all critical sections for the synchronizer object
-// that were extent when the wait was initiated.  Usage is that there
+// that were extant when the wait was initiated.  Usage is that there
 // is some state that can be accessed either before or after some
 // change.  An accessing thread performs the access within a critical
 // section.  A writer thread performs the state change, and then waits
@@ -46,9 +46,7 @@
 // Generally, GlobalCounter should be used instead of this class, as
 // GlobalCounter has measurably better performance and doesn't have
 // the single writer at a time restriction.  Use this only in
-// situations where GlobalCounter won't work for some reason, such as
-// nesting.  But note that nesting often indicates other problems, and
-// may risk deadlock.
+// situations where GlobalCounter won't work for some reason.
 class SingleWriterSynchronizer {
   volatile uint _enter;
   volatile uint _exit[2];
--- a/test/hotspot/gtest/utilities/test_globalCounter.cpp	Tue Oct 30 14:38:59 2018 -0700
+++ b/test/hotspot/gtest/utilities/test_globalCounter.cpp	Tue Oct 30 18:06:35 2018 -0400
@@ -47,11 +47,11 @@
   void main_run() {
     _wrt_start->signal();
     while (!_exit) {
-      GlobalCounter::critical_section_begin(this);
+      GlobalCounter::CSContext cs_context = GlobalCounter::critical_section_begin(this);
       volatile TestData* test = OrderAccess::load_acquire(_test);
       long value = OrderAccess::load_acquire(&test->test_value);
       ASSERT_EQ(value, GOOD_VALUE);
-      GlobalCounter::critical_section_end(this);
+      GlobalCounter::critical_section_end(this, cs_context);
       {
         GlobalCounter::CriticalSection cs(this);
         volatile TestData* test = OrderAccess::load_acquire(_test);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/gtest/utilities/test_globalCounter_nested.cpp	Tue Oct 30 18:06:35 2018 -0400
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "metaprogramming/isRegisteredEnum.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
+#include "runtime/os.hpp"
+#include "utilities/globalCounter.hpp"
+#include "utilities/globalCounter.inline.hpp"
+#include "utilities/spinYield.hpp"
+#include "threadHelper.inline.hpp"
+
+enum NestedTestState {
+  START,
+  START_WAIT,
+  OUTER_ENTERED,
+  INNER_ENTERED,
+  INNER_EXITED,
+  OUTER_EXITED,
+  SYNCHRONIZING,
+  SYNCHRONIZED
+};
+template<> struct IsRegisteredEnum<NestedTestState> : public TrueType {};
+
+class RCUNestedThread : public JavaTestThread {
+  volatile NestedTestState _state;
+  volatile bool _proceed;
+
+protected:
+  RCUNestedThread(Semaphore* post) :
+    JavaTestThread(post),
+    _state(START),
+    _proceed(false)
+  {}
+
+  ~RCUNestedThread() {}
+
+  void set_state(NestedTestState new_state) {
+    OrderAccess::release_store(&_state, new_state);
+  }
+
+  void wait_with_state(NestedTestState new_state) {
+    SpinYield spinner;
+    OrderAccess::release_store(&_state, new_state);
+    while (!OrderAccess::load_acquire(&_proceed)) {
+      spinner.wait();
+    }
+    OrderAccess::release_store(&_proceed, false);
+  }
+
+public:
+  NestedTestState state() const {
+    return OrderAccess::load_acquire(&_state);
+  }
+
+  void wait_for_state(NestedTestState goal) {
+    SpinYield spinner;
+    while (state() != goal) {
+      spinner.wait();
+    }
+  }
+
+  void proceed() {
+    OrderAccess::release_store(&_proceed, true);
+  }
+};
+
+class RCUNestedReaderThread : public RCUNestedThread {
+public:
+  RCUNestedReaderThread(Semaphore* post) :
+    RCUNestedThread(post)
+  {}
+
+  virtual void main_run();
+};
+
+void RCUNestedReaderThread::main_run() {
+  wait_with_state(START_WAIT);
+  {
+    GlobalCounter::CriticalSection outer(Thread::current());
+    wait_with_state(OUTER_ENTERED);
+    {
+      GlobalCounter::CriticalSection inner(Thread::current());
+      wait_with_state(INNER_ENTERED);
+    }
+    wait_with_state(INNER_EXITED);
+  }
+  wait_with_state(OUTER_EXITED);
+}
+
+
+class RCUNestedWriterThread : public RCUNestedThread {
+public:
+  RCUNestedWriterThread(Semaphore* post) :
+    RCUNestedThread(post)
+  {}
+
+  virtual void main_run();
+};
+
+void RCUNestedWriterThread::main_run() {
+  wait_with_state(START_WAIT);
+  set_state(SYNCHRONIZING);
+  GlobalCounter::write_synchronize();
+  wait_with_state(SYNCHRONIZED);
+}
+
+TEST_VM(GlobalCounter, nested_critical_section) {
+  Semaphore post;
+  RCUNestedReaderThread* reader = new RCUNestedReaderThread(&post);
+  RCUNestedWriterThread* outer = new RCUNestedWriterThread(&post);
+  RCUNestedWriterThread* inner = new RCUNestedWriterThread(&post);
+
+  reader->doit();
+  outer->doit();
+  inner->doit();
+
+  reader->wait_for_state(START_WAIT);
+  outer->wait_for_state(START_WAIT);
+  inner->wait_for_state(START_WAIT);
+  EXPECT_EQ(START_WAIT, reader->state());
+  EXPECT_EQ(START_WAIT, outer->state());
+  EXPECT_EQ(START_WAIT, inner->state());
+
+  reader->proceed();
+  reader->wait_for_state(OUTER_ENTERED);
+  EXPECT_EQ(OUTER_ENTERED, reader->state());
+  EXPECT_EQ(START_WAIT, outer->state());
+  EXPECT_EQ(START_WAIT, inner->state());
+
+  outer->proceed();
+  outer->wait_for_state(SYNCHRONIZING);
+  EXPECT_EQ(OUTER_ENTERED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(START_WAIT, inner->state());
+
+  os::naked_short_sleep(100);   // Give outer time in synchronization.
+  EXPECT_EQ(OUTER_ENTERED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(START_WAIT, inner->state());
+
+  reader->proceed();
+  reader->wait_for_state(INNER_ENTERED);
+  EXPECT_EQ(INNER_ENTERED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(START_WAIT, inner->state());
+
+  inner->proceed();
+  inner->wait_for_state(SYNCHRONIZING);
+  EXPECT_EQ(INNER_ENTERED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(SYNCHRONIZING, inner->state());
+
+  os::naked_short_sleep(100); // Give writers time in synchronization.
+  EXPECT_EQ(INNER_ENTERED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(SYNCHRONIZING, inner->state());
+
+  reader->proceed();
+  reader->wait_for_state(INNER_EXITED);
+  // inner does *not* complete synchronization here.
+  EXPECT_EQ(INNER_EXITED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(SYNCHRONIZING, inner->state());
+
+  os::naked_short_sleep(100); // Give writers more time in synchronization.
+  EXPECT_EQ(INNER_EXITED, reader->state());
+  EXPECT_EQ(SYNCHRONIZING, outer->state());
+  EXPECT_EQ(SYNCHRONIZING, inner->state());
+
+  reader->proceed();
+  reader->wait_for_state(OUTER_EXITED);
+  // Both inner and outer can synchronize now.
+  outer->wait_for_state(SYNCHRONIZED);
+  inner->wait_for_state(SYNCHRONIZED);
+  EXPECT_EQ(OUTER_EXITED, reader->state());
+  EXPECT_EQ(SYNCHRONIZED, outer->state());
+  EXPECT_EQ(SYNCHRONIZED, inner->state());
+
+  // Wait for reader, outer, and inner to complete.
+  reader->proceed();
+  outer->proceed();
+  inner->proceed();
+  for (uint i = 0; i < 3; ++i) {
+    post.wait();
+  }
+}