8212826: Make PtrQueue free list lock-free
Summary: Add lock-free stack and use in BufferNode::Allocator.
Reviewed-by: tschatzl, sangheki
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp Sat Jan 19 19:50:01 2019 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -55,8 +55,8 @@
make_barrier_set_c2<G1BarrierSetC2>(),
card_table,
BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)),
- _satb_mark_queue_buffer_allocator(G1SATBBufferSize, SATB_Q_FL_lock),
- _dirty_card_queue_buffer_allocator(G1UpdateBufferSize, DirtyCardQ_FL_lock),
+ _satb_mark_queue_buffer_allocator("SATB Buffer Allocator", G1SATBBufferSize),
+ _dirty_card_queue_buffer_allocator("DC Buffer Allocator", G1UpdateBufferSize),
_satb_mark_queue_set(),
_dirty_card_queue_set()
{}
--- a/src/hotspot/share/gc/shared/ptrQueue.cpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/gc/shared/ptrQueue.cpp Sat Jan 19 19:50:01 2019 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,12 +24,15 @@
#include "precompiled.hpp"
#include "gc/shared/ptrQueue.hpp"
+#include "logging/log.hpp"
#include "memory/allocation.hpp"
#include "memory/allocation.inline.hpp"
#include "runtime/atomic.hpp"
#include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.hpp"
#include "runtime/thread.inline.hpp"
+#include "utilities/globalCounter.inline.hpp"
#include <new>
@@ -85,20 +88,29 @@
FREE_C_HEAP_ARRAY(char, node);
}
-BufferNode::Allocator::Allocator(size_t buffer_size, Mutex* lock) :
+BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) :
_buffer_size(buffer_size),
- _lock(lock),
- _free_list(NULL),
- _free_count(0)
+ _pending_list(),
+ _free_list(),
+ _pending_count(0),
+ _free_count(0),
+ _transfer_lock(false)
{
- assert(lock != NULL, "precondition");
+ strncpy(_name, name, sizeof(_name));
+ _name[sizeof(_name) - 1] = '\0';
}
BufferNode::Allocator::~Allocator() {
- while (_free_list != NULL) {
- BufferNode* node = _free_list;
- _free_list = node->next();
- BufferNode::deallocate(node);
+ delete_list(_free_list.pop_all());
+ delete_list(_pending_list.pop_all());
+}
+
+void BufferNode::Allocator::delete_list(BufferNode* list) {
+ while (list != NULL) {
+ BufferNode* next = list->next();
+ DEBUG_ONLY(list->set_next(NULL);)
+ BufferNode::deallocate(list);
+ list = next;
}
}
@@ -107,55 +119,109 @@
}
BufferNode* BufferNode::Allocator::allocate() {
- BufferNode* node = NULL;
+ BufferNode* node;
{
- MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
- node = _free_list;
- if (node != NULL) {
- _free_list = node->next();
- --_free_count;
- node->set_next(NULL);
- node->set_index(0);
- return node;
- }
+ // Protect against ABA; see release().
+ GlobalCounter::CriticalSection cs(Thread::current());
+ node = _free_list.pop();
}
- return BufferNode::allocate(_buffer_size);
+ if (node == NULL) {
+ node = BufferNode::allocate(_buffer_size);
+ } else {
+ // Decrement count after getting buffer from free list. This, along
+ // with incrementing count before adding to free list, ensures count
+ // never underflows.
+ size_t count = Atomic::sub(1u, &_free_count);
+ assert((count + 1) != 0, "_free_count underflow");
+ }
+ return node;
}
+// To solve the ABA problem for lock-free stack pop, allocate does the
+// pop inside a critical section, and release synchronizes on the
+// critical sections before adding to the _free_list. But we don't
+// want to make every release have to do a synchronize. Instead, we
+// initially place released nodes on the _pending_list, and transfer
+// them to the _free_list in batches. Only one transfer at a time is
+// permitted, with a lock bit to control access to that phase. A
+// transfer takes all the nodes from the _pending_list, synchronizes on
+// the _free_list pops, and then adds the former pending nodes to the
+// _free_list. While that's happening, other threads might be adding
+// other nodes to the _pending_list, to be dealt with by some later
+// transfer.
void BufferNode::Allocator::release(BufferNode* node) {
- MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
- node->set_next(_free_list);
- _free_list = node;
- ++_free_count;
+ assert(node != NULL, "precondition");
+ assert(node->next() == NULL, "precondition");
+
+ // Desired minimum transfer batch size. There is relatively little
+ // importance to the specific number. It shouldn't be too big, else
+ // we're wasting space when the release rate is low. If the release
+ // rate is high, we might accumulate more than this before being
+ // able to start a new transfer, but that's okay. Also note that
+ // the allocation rate and the release rate are going to be fairly
+ // similar, due to how the buffers are used.
+ const size_t trigger_transfer = 10;
+
+ // Add to pending list. Update count first so no underflow in transfer.
+ size_t pending_count = Atomic::add(1u, &_pending_count);
+ _pending_list.push(*node);
+ if (pending_count > trigger_transfer) {
+ try_transfer_pending();
+ }
}
-void BufferNode::Allocator::reduce_free_list() {
- BufferNode* head = NULL;
- {
- MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
- // For now, delete half.
- size_t remove = _free_count / 2;
- if (remove > 0) {
- head = _free_list;
- BufferNode* tail = head;
- BufferNode* prev = NULL;
- for (size_t i = 0; i < remove; ++i) {
- assert(tail != NULL, "free list size is wrong");
- prev = tail;
- tail = tail->next();
- }
- assert(prev != NULL, "invariant");
- assert(prev->next() == tail, "invariant");
- prev->set_next(NULL);
- _free_list = tail;
- _free_count -= remove;
+// Try to transfer nodes from _pending_list to _free_list, with a
+// synchronization delay for any in-progress pops from the _free_list,
+// to solve ABA there. Return true if performed a (possibly empty)
+// transfer, false if blocked from doing so by some other thread's
+// in-progress transfer.
+bool BufferNode::Allocator::try_transfer_pending() {
+ // Attempt to claim the lock.
+ if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail.
+ Atomic::cmpxchg(true, &_transfer_lock, false)) {
+ return false;
+ }
+ // Have the lock; perform the transfer.
+
+ // Claim all the pending nodes.
+ BufferNode* first = _pending_list.pop_all();
+ if (first != NULL) {
+ // Prepare to add the claimed nodes, and update _pending_count.
+ BufferNode* last = first;
+ size_t count = 1;
+ for (BufferNode* next = first->next(); next != NULL; next = next->next()) {
+ last = next;
+ ++count;
}
+ Atomic::sub(count, &_pending_count);
+
+ // Wait for any in-progress pops, to avoid ABA for them.
+ GlobalCounter::write_synchronize();
+
+ // Add synchronized nodes to _free_list.
+ // Update count first so no underflow in allocate().
+ Atomic::add(count, &_free_count);
+ _free_list.prepend(*first, *last);
+ log_trace(gc, ptrqueue, freelist)
+ ("Transferred %s pending to free: " SIZE_FORMAT, name(), count);
}
- while (head != NULL) {
- BufferNode* next = head->next();
- BufferNode::deallocate(head);
- head = next;
+ OrderAccess::release_store(&_transfer_lock, false);
+ return true;
+}
+
+size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) {
+ try_transfer_pending();
+ size_t removed = 0;
+ for ( ; removed < remove_goal; ++removed) {
+ BufferNode* node = _free_list.pop();
+ if (node == NULL) break;
+ BufferNode::deallocate(node);
}
+ size_t new_count = Atomic::sub(removed, &_free_count);
+ log_debug(gc, ptrqueue, freelist)
+ ("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT,
+ name(), removed, new_count);
+ return removed;
}
PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
--- a/src/hotspot/share/gc/shared/ptrQueue.hpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/gc/shared/ptrQueue.hpp Sat Jan 19 19:50:01 2019 -0500
@@ -25,7 +25,10 @@
#ifndef SHARE_GC_SHARED_PTRQUEUE_HPP
#define SHARE_GC_SHARED_PTRQUEUE_HPP
+#include "memory/padded.hpp"
#include "utilities/align.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/lockFreeStack.hpp"
#include "utilities/sizes.hpp"
class Mutex;
@@ -215,7 +218,7 @@
class BufferNode {
size_t _index;
- BufferNode* _next;
+ BufferNode* volatile _next;
void* _buffer[1]; // Pseudo flexible array member.
BufferNode() : _index(0), _next(NULL) { }
@@ -225,6 +228,8 @@
return offset_of(BufferNode, _buffer);
}
+ static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; }
+
AIX_ONLY(public:) // xlC 12 on AIX doesn't implement C++ DR45.
// Allocate a new BufferNode with the "buffer" having size elements.
static BufferNode* allocate(size_t size);
@@ -233,6 +238,8 @@
static void deallocate(BufferNode* node);
public:
+ typedef LockFreeStack<BufferNode, &next_ptr> Stack;
+
BufferNode* next() const { return _next; }
void set_next(BufferNode* n) { _next = n; }
size_t index() const { return _index; }
@@ -254,23 +261,52 @@
reinterpret_cast<char*>(node) + buffer_offset());
}
- // Free-list based allocator.
- class Allocator {
- size_t _buffer_size;
- Mutex* _lock;
- BufferNode* _free_list;
- volatile size_t _free_count;
+ class Allocator; // Free-list based allocator.
+ class TestSupport; // Unit test support.
+};
+
+// Allocation is based on a lock-free free list of nodes, linked through
+// BufferNode::_next (see BufferNode::Stack). To solve the ABA problem,
+// popping a node from the free list is performed within a GlobalCounter
+// critical section, and pushing nodes onto the free list is done after
+// a GlobalCounter synchronization associated with the nodes to be pushed.
+// This is documented behavior so that other parts of the node life-cycle
+// can depend on and make use of it too.
+class BufferNode::Allocator {
+ friend class TestSupport;
+
+ // Since we don't expect many instances, and measured >15% speedup
+ // on stress gtest, padding seems like a good tradeoff here.
+#define DECLARE_PADDED_MEMBER(Id, Type, Name) \
+ Type Name; DEFINE_PAD_MINUS_SIZE(Id, DEFAULT_CACHE_LINE_SIZE, sizeof(Type))
- public:
- Allocator(size_t buffer_size, Mutex* lock);
- ~Allocator();
+ const size_t _buffer_size;
+ char _name[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; // Use name as padding.
+ DECLARE_PADDED_MEMBER(1, Stack, _pending_list);
+ DECLARE_PADDED_MEMBER(2, Stack, _free_list);
+ DECLARE_PADDED_MEMBER(3, volatile size_t, _pending_count);
+ DECLARE_PADDED_MEMBER(4, volatile size_t, _free_count);
+ DECLARE_PADDED_MEMBER(5, volatile bool, _transfer_lock);
+
+#undef DECLARE_PADDED_MEMBER
+
+ void delete_list(BufferNode* list);
+ bool try_transfer_pending();
- size_t buffer_size() const { return _buffer_size; }
- size_t free_count() const;
- BufferNode* allocate();
- void release(BufferNode* node);
- void reduce_free_list();
- };
+public:
+ Allocator(const char* name, size_t buffer_size);
+ ~Allocator();
+
+ const char* name() const { return _name; }
+ size_t buffer_size() const { return _buffer_size; }
+ size_t free_count() const;
+ BufferNode* allocate();
+ void release(BufferNode* node);
+
+ // Deallocate some of the available buffers. remove_goal is the target
+ // number to remove. Returns the number actually deallocated, which may
+ // be less than the goal if there were fewer available.
+ size_t reduce_free_list(size_t remove_goal);
};
// A PtrQueueSet represents resources common to a set of pointer queues.
--- a/src/hotspot/share/logging/logTag.hpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/logging/logTag.hpp Sat Jan 19 19:50:01 2019 -0500
@@ -133,6 +133,7 @@
LOG_TAG(reloc) \
LOG_TAG(remset) \
LOG_TAG(parser) \
+ LOG_TAG(ptrqueue) \
LOG_TAG(purge) \
LOG_TAG(resolve) \
LOG_TAG(safepoint) \
--- a/src/hotspot/share/memory/padded.hpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/memory/padded.hpp Sat Jan 19 19:50:01 2019 -0500
@@ -25,6 +25,7 @@
#ifndef SHARE_MEMORY_PADDED_HPP
#define SHARE_MEMORY_PADDED_HPP
+#include "memory/allocation.hpp"
#include "utilities/align.hpp"
#include "utilities/globalDefinitions.hpp"
--- a/src/hotspot/share/runtime/mutexLocker.cpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/runtime/mutexLocker.cpp Sat Jan 19 19:50:01 2019 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -79,10 +79,8 @@
Monitor* CGC_lock = NULL;
Monitor* STS_lock = NULL;
Monitor* FullGCCount_lock = NULL;
-Mutex* SATB_Q_FL_lock = NULL;
Monitor* SATB_Q_CBL_mon = NULL;
Mutex* Shared_SATB_Q_lock = NULL;
-Mutex* DirtyCardQ_FL_lock = NULL;
Monitor* DirtyCardQ_CBL_mon = NULL;
Mutex* Shared_DirtyCardQ_lock = NULL;
Mutex* MarkStackFreeList_lock = NULL;
@@ -214,11 +212,9 @@
def(FullGCCount_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); // in support of ExplicitGCInvokesConcurrent
}
if (UseG1GC) {
- def(SATB_Q_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never);
def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never);
def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never);
- def(DirtyCardQ_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never);
def(DirtyCardQ_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never);
def(Shared_DirtyCardQ_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never);
@@ -235,7 +231,6 @@
def(MonitoringSupport_lock , PaddedMutex , native , true, Monitor::_safepoint_check_never); // used for serviceability monitoring support
}
if (UseShenandoahGC) {
- def(SATB_Q_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never);
def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never);
def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never);
--- a/src/hotspot/share/runtime/mutexLocker.hpp Sat Jan 19 11:20:01 2019 +0100
+++ b/src/hotspot/share/runtime/mutexLocker.hpp Sat Jan 19 19:50:01 2019 -0500
@@ -76,16 +76,12 @@
// fore- & background GC threads.
extern Monitor* STS_lock; // used for joining/leaving SuspendibleThreadSet.
extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc
-extern Mutex* SATB_Q_FL_lock; // Protects SATB Q
- // buffer free list.
extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q
// completed buffer queue.
extern Mutex* Shared_SATB_Q_lock; // Lock protecting SATB
// queue shared by
// non-Java threads.
-extern Mutex* DirtyCardQ_FL_lock; // Protects dirty card Q
- // buffer free list.
extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q
// completed buffer queue.
extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/utilities/lockFreeStack.hpp Sat Jan 19 19:50:01 2019 -0500
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_UTILITIES_LOCKFREESTACK_HPP
+#define SHARE_UTILITIES_LOCKFREESTACK_HPP
+
+#include "runtime/atomic.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/macros.hpp"
+
+// The LockFreeStack class template provides a lock-free LIFO. The objects
+// in the sequence are intrusively linked via a member in the objects. As
+// a result, there is no allocation involved in adding objects to the stack
+// or removing them from the stack.
+//
+// To be used in a LockFreeStack of objects of type T, an object of
+// type T must have a list entry member of type T* volatile, with an
+// non-member accessor function returning a pointer to that member. A
+// LockFreeStack is associated with the class of its elements and an
+// entry member from that class.
+//
+// An object can be in multiple stacks at the same time, so long as
+// each stack uses a different entry member. That is, the class of the
+// object must have multiple LockFreeStack entry members, one for each
+// stack in which the object may simultaneously be an element.
+//
+// LockFreeStacks support polymorphic elements. Because the objects
+// in a stack are externally managed, rather than being embedded
+// values in the stack, the actual type of such objects may be more
+// specific than the stack's element type.
+//
+// \tparam T is the class of the elements in the stack.
+//
+// \tparam next_ptr is a function pointer. Applying this function to
+// an object of type T must return a pointer to the list entry member
+// of the object associated with the LockFreeStack type.
+template<typename T, T* volatile* (*next_ptr)(T&)>
+class LockFreeStack {
+ T* volatile _top;
+
+ void prepend_impl(T* first, T* last) {
+ T* cur = top();
+ T* old;
+ do {
+ old = cur;
+ set_next(*last, cur);
+ cur = Atomic::cmpxchg(first, &_top, cur);
+ } while (old != cur);
+ }
+
+ // Noncopyable.
+ LockFreeStack(const LockFreeStack&);
+ LockFreeStack& operator=(const LockFreeStack&);
+
+public:
+ LockFreeStack() : _top(NULL) {}
+ ~LockFreeStack() { assert(empty(), "stack not empty"); }
+
+ // Atomically removes the top object from this stack and returns a
+ // pointer to that object, or NULL if this stack is empty. Acts as a
+ // full memory barrier. Subject to ABA behavior; callers must ensure
+ // usage is safe.
+ T* pop() {
+ T* result = top();
+ T* old;
+ do {
+ old = result;
+ T* new_top = NULL;
+ if (result != NULL) {
+ new_top = next(*result);
+ }
+ // CAS even on empty pop, for consistent membar bahavior.
+ result = Atomic::cmpxchg(new_top, &_top, result);
+ } while (result != old);
+ if (result != NULL) {
+ set_next(*result, NULL);
+ }
+ return result;
+ }
+
+ // Atomically exchange the list of elements with NULL, returning the old
+ // list of elements. Acts as a full memory barrier.
+ // postcondition: empty()
+ T* pop_all() {
+ return Atomic::xchg((T*)NULL, &_top);
+ }
+
+ // Atomically adds value to the top of this stack. Acts as a full
+ // memory barrier.
+ void push(T& value) {
+ assert(next(value) == NULL, "precondition");
+ prepend_impl(&value, &value);
+ }
+
+ // Atomically adds the list of objects (designated by first and
+ // last) before the objects already in this stack, in the same order
+ // as in the list. Acts as a full memory barrier.
+ // precondition: next(last) == NULL.
+ // postcondition: top() == &first, next(last) == old top().
+ void prepend(T& first, T& last) {
+ assert(next(last) == NULL, "precondition");
+#ifdef ASSERT
+ for (T* p = &first; p != &last; p = next(*p)) {
+ assert(p != NULL, "invalid prepend list");
+ }
+#endif
+ prepend_impl(&first, &last);
+ }
+
+ // Atomically adds the list of objects headed by first before the
+ // objects already in this stack, in the same order as in the list.
+ // Acts as a full memory barrier.
+ // postcondition: top() == &first.
+ void prepend(T& first) {
+ T* last = &first;
+ while (true) {
+ T* step_to = next(*last);
+ if (step_to == NULL) break;
+ last = step_to;
+ }
+ prepend_impl(&first, last);
+ }
+
+ // Return true if the stack is empty.
+ bool empty() const { return top() == NULL; }
+
+ // Return the most recently pushed element, or NULL if the stack is empty.
+ // The returned element is not removed from the stack.
+ T* top() const { return Atomic::load(&_top); }
+
+ // Return the number of objects in the stack. There must be no concurrent
+ // pops while the length is being determined.
+ size_t length() const {
+ size_t result = 0;
+ for (const T* current = top(); current != NULL; current = next(*current)) {
+ ++result;
+ }
+ return result;
+ }
+
+ // Return the entry following value in the list used by the
+ // specialized LockFreeStack class.
+ static T* next(const T& value) {
+ return Atomic::load(next_ptr(const_cast<T&>(value)));
+ }
+
+ // Set the entry following value to new_next in the list used by the
+ // specialized LockFreeStack class. Not thread-safe; in particular,
+ // if value is in an instance of this specialization of LockFreeStack,
+ // there must be no concurrent push or pop operations on that stack.
+ static void set_next(T& value, T* new_next) {
+ Atomic::store(new_next, next_ptr(value));
+ }
+};
+
+#endif // SHARE_UTILITIES_LOCKFREESTACK_HPP
--- a/test/hotspot/gtest/gc/shared/test_ptrQueueBufferAllocator.cpp Sat Jan 19 11:20:01 2019 +0100
+++ b/test/hotspot/gtest/gc/shared/test_ptrQueueBufferAllocator.cpp Sat Jan 19 19:50:01 2019 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,14 +24,37 @@
#include "precompiled.hpp"
#include "gc/shared/ptrQueue.hpp"
-#include "runtime/mutex.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/orderAccess.hpp"
+#include "runtime/semaphore.inline.hpp"
+#include "runtime/thread.hpp"
+#include "utilities/globalCounter.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+#include "threadHelper.inline.hpp"
#include "unittest.hpp"
+class BufferNode::TestSupport : AllStatic {
+public:
+ static bool try_transfer_pending(Allocator* allocator) {
+ return allocator->try_transfer_pending();
+ }
+
+ class CompletedList;
+ class AllocatorThread;
+ class ProcessorThread;
+};
+
+typedef BufferNode::TestSupport::CompletedList CompletedList;
+typedef BufferNode::TestSupport::AllocatorThread AllocatorThread;
+typedef BufferNode::TestSupport::ProcessorThread ProcessorThread;
+
// Some basic testing of BufferNode::Allocator.
TEST_VM(PtrQueueBufferAllocatorTest, test) {
- Mutex m(Mutex::leaf, "PtrQueueBufferAllocatorTest",
- false, Mutex::_safepoint_check_never);
- BufferNode::Allocator allocator(256, &m);
+ const size_t buffer_size = 256;
+ BufferNode::Allocator allocator("Test Buffer Allocator", buffer_size);
+ ASSERT_EQ(buffer_size, allocator.buffer_size());
// Allocate some new nodes for use in testing.
BufferNode* nodes[10] = {};
@@ -44,8 +67,11 @@
// Release the nodes, adding them to the allocator's free list.
for (size_t i = 0; i < node_count; ++i) {
- ASSERT_EQ(i, allocator.free_count());
allocator.release(nodes[i]);
+ }
+ ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator));
+ ASSERT_EQ(node_count, allocator.free_count());
+ for (size_t i = 0; i < node_count; ++i) {
if (i == 0) {
ASSERT_EQ((BufferNode*)NULL, nodes[i]->next());
} else {
@@ -56,7 +82,6 @@
// Allocate nodes from the free list.
for (size_t i = 0; i < node_count; ++i) {
size_t j = node_count - i;
- ASSERT_EQ(j, allocator.free_count());
ASSERT_EQ(nodes[j - 1], allocator.allocate());
}
ASSERT_EQ(0u, allocator.free_count());
@@ -65,11 +90,161 @@
for (size_t i = 0; i < node_count; ++i) {
allocator.release(nodes[i]);
}
+ ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator));
ASSERT_EQ(node_count, allocator.free_count());
// Destroy some nodes in the free list.
// We don't have a way to verify destruction, but we can at
- // leat verify we don't crash along the way.
- allocator.reduce_free_list();
+ // least verify we don't crash along the way.
+ size_t count = allocator.free_count();
+ ASSERT_EQ(count, allocator.reduce_free_list(count));
// destroy allocator.
}
+
+// Stress test with lock-free allocator and completed buffer list.
+// Completed buffer list pop avoids ABA by also being in a critical
+// section that is synchronized by the allocator's release.
+
+class BufferNode::TestSupport::CompletedList {
+ BufferNode::Stack _completed_list;
+
+public:
+ CompletedList() : _completed_list() {}
+
+ ~CompletedList() {
+ assert(_completed_list.empty(), "completed list not empty");
+ }
+
+ void push(BufferNode* node) {
+ assert(node != NULL, "precondition");
+ _completed_list.push(*node);
+ }
+
+ BufferNode* pop() {
+ GlobalCounter::CriticalSection cs(Thread::current());
+ return _completed_list.pop();
+ }
+};
+
+// Simulate a mutator thread, allocating buffers and adding them to
+// the completed buffer list.
+class BufferNode::TestSupport::AllocatorThread : public JavaTestThread {
+ BufferNode::Allocator* _allocator;
+ CompletedList* _cbl;
+ volatile size_t* _total_allocations;
+ volatile bool* _continue_running;
+ size_t _allocations;
+
+public:
+ AllocatorThread(Semaphore* post,
+ BufferNode::Allocator* allocator,
+ CompletedList* cbl,
+ volatile size_t* total_allocations,
+ volatile bool* continue_running) :
+ JavaTestThread(post),
+ _allocator(allocator),
+ _cbl(cbl),
+ _total_allocations(total_allocations),
+ _continue_running(continue_running),
+ _allocations(0)
+ {}
+
+ virtual void main_run() {
+ while (OrderAccess::load_acquire(_continue_running)) {
+ BufferNode* node = _allocator->allocate();
+ _cbl->push(node);
+ ++_allocations;
+ ThreadBlockInVM tbiv(this); // Safepoint check.
+ }
+ tty->print_cr("allocations: " SIZE_FORMAT, _allocations);
+ Atomic::add(_allocations, _total_allocations);
+ }
+};
+
+// Simulate a GC thread, taking buffers from the completed buffer list
+// and returning them to the allocator.
+class BufferNode::TestSupport::ProcessorThread : public JavaTestThread {
+ BufferNode::Allocator* _allocator;
+ CompletedList* _cbl;
+ volatile bool* _continue_running;
+
+public:
+ ProcessorThread(Semaphore* post,
+ BufferNode::Allocator* allocator,
+ CompletedList* cbl,
+ volatile bool* continue_running) :
+ JavaTestThread(post),
+ _allocator(allocator),
+ _cbl(cbl),
+ _continue_running(continue_running)
+ {}
+
+ virtual void main_run() {
+ while (true) {
+ BufferNode* node = _cbl->pop();
+ if (node != NULL) {
+ _allocator->release(node);
+ } else if (!OrderAccess::load_acquire(_continue_running)) {
+ return;
+ }
+ ThreadBlockInVM tbiv(this); // Safepoint check.
+ }
+ }
+};
+
+static void run_test(BufferNode::Allocator* allocator, CompletedList* cbl) {
+ const uint nthreads = 4;
+ const uint milliseconds_to_run = 1000;
+
+ Semaphore post;
+ volatile size_t total_allocations = 0;
+ volatile bool allocator_running = true;
+ volatile bool processor_running = true;
+
+ ProcessorThread* proc_threads[nthreads] = {};
+ for (uint i = 0; i < nthreads; ++i) {
+ proc_threads[i] = new ProcessorThread(&post,
+ allocator,
+ cbl,
+ &processor_running);
+ proc_threads[i]->doit();
+ }
+
+ AllocatorThread* alloc_threads[nthreads] = {};
+ for (uint i = 0; i < nthreads; ++i) {
+ alloc_threads[i] = new AllocatorThread(&post,
+ allocator,
+ cbl,
+ &total_allocations,
+ &allocator_running);
+ alloc_threads[i]->doit();
+ }
+
+ JavaThread* this_thread = JavaThread::current();
+ tty->print_cr("Stressing allocator for %u ms", milliseconds_to_run);
+ {
+ ThreadInVMfromNative invm(this_thread);
+ os::sleep(this_thread, milliseconds_to_run, true);
+ }
+ OrderAccess::release_store(&allocator_running, false);
+ for (uint i = 0; i < nthreads; ++i) {
+ ThreadInVMfromNative invm(this_thread);
+ post.wait_with_safepoint_check(this_thread);
+ }
+ OrderAccess::release_store(&processor_running, false);
+ for (uint i = 0; i < nthreads; ++i) {
+ ThreadInVMfromNative invm(this_thread);
+ post.wait_with_safepoint_check(this_thread);
+ }
+ ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(allocator));
+ tty->print_cr("total allocations: " SIZE_FORMAT, total_allocations);
+ tty->print_cr("allocator free count: " SIZE_FORMAT, allocator->free_count());
+}
+
+const size_t buffer_size = 1024;
+
+TEST_VM(PtrQueueBufferAllocatorTest, stress_free_list_allocator) {
+ BufferNode::Allocator allocator("Test Allocator", buffer_size);
+ CompletedList completed;
+ run_test(&allocator, &completed);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/gtest/utilities/test_lockFreeStack.cpp Sat Jan 19 19:50:01 2019 -0500
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/lockFreeStack.hpp"
+#include "threadHelper.inline.hpp"
+#include "unittest.hpp"
+#include <new>
+
+class LockFreeStackTestElement {
+ typedef LockFreeStackTestElement Element;
+
+ Element* volatile _entry;
+ Element* volatile _entry1;
+ size_t _id;
+
+ static Element* volatile* entry_ptr(Element& e) { return &e._entry; }
+ static Element* volatile* entry1_ptr(Element& e) { return &e._entry1; }
+
+public:
+ LockFreeStackTestElement(size_t id = 0) : _entry(), _entry1(), _id(id) {}
+ size_t id() const { return _id; }
+ void set_id(size_t value) { _id = value; }
+
+ typedef LockFreeStack<Element, &entry_ptr> Stack;
+ typedef LockFreeStack<Element, &entry1_ptr> Stack1;
+};
+
+typedef LockFreeStackTestElement Element;
+typedef Element::Stack Stack;
+typedef Element::Stack1 Stack1;
+
+static void initialize_ids(Element* elements, size_t size) {
+ for (size_t i = 0; i < size; ++i) {
+ elements[i].set_id(i);
+ }
+}
+
+class LockFreeStackTestBasics : public ::testing::Test {
+public:
+ LockFreeStackTestBasics();
+
+ static const size_t nelements = 10;
+ Element elements[nelements];
+ Stack stack;
+
+private:
+ void initialize();
+};
+
+const size_t LockFreeStackTestBasics::nelements;
+
+LockFreeStackTestBasics::LockFreeStackTestBasics() : stack() {
+ initialize_ids(elements, nelements);
+ initialize();
+}
+
+void LockFreeStackTestBasics::initialize() {
+ ASSERT_TRUE(stack.empty());
+ ASSERT_EQ(0u, stack.length());
+ ASSERT_TRUE(stack.pop() == NULL);
+ ASSERT_TRUE(stack.top() == NULL);
+
+ for (size_t id = 0; id < nelements; ++id) {
+ ASSERT_EQ(id, stack.length());
+ Element* e = &elements[id];
+ ASSERT_EQ(id, e->id());
+ stack.push(*e);
+ ASSERT_FALSE(stack.empty());
+ ASSERT_EQ(e, stack.top());
+ }
+}
+
+TEST_F(LockFreeStackTestBasics, push_pop) {
+ for (size_t i = nelements; i > 0; ) {
+ ASSERT_FALSE(stack.empty());
+ ASSERT_EQ(i, stack.length());
+ --i;
+ Element* e = stack.pop();
+ ASSERT_TRUE(e != NULL);
+ ASSERT_EQ(&elements[i], e);
+ ASSERT_EQ(i, e->id());
+ }
+ ASSERT_TRUE(stack.empty());
+ ASSERT_EQ(0u, stack.length());
+ ASSERT_TRUE(stack.pop() == NULL);
+}
+
+TEST_F(LockFreeStackTestBasics, prepend_one) {
+ Stack other_stack;
+ ASSERT_TRUE(other_stack.empty());
+ ASSERT_TRUE(other_stack.pop() == NULL);
+ ASSERT_EQ(0u, other_stack.length());
+ ASSERT_TRUE(other_stack.top() == NULL);
+ ASSERT_TRUE(other_stack.pop() == NULL);
+
+ other_stack.prepend(*stack.pop_all());
+ ASSERT_EQ(nelements, other_stack.length());
+ ASSERT_TRUE(stack.empty());
+ ASSERT_EQ(0u, stack.length());
+ ASSERT_TRUE(stack.pop() == NULL);
+ ASSERT_TRUE(stack.top() == NULL);
+
+ for (size_t i = nelements; i > 0; ) {
+ ASSERT_EQ(i, other_stack.length());
+ --i;
+ Element* e = other_stack.pop();
+ ASSERT_TRUE(e != NULL);
+ ASSERT_EQ(&elements[i], e);
+ ASSERT_EQ(i, e->id());
+ }
+ ASSERT_EQ(0u, other_stack.length());
+ ASSERT_TRUE(other_stack.pop() == NULL);
+}
+
+TEST_F(LockFreeStackTestBasics, prepend_two) {
+ Stack other_stack;
+ ASSERT_TRUE(other_stack.empty());
+ ASSERT_EQ(0u, other_stack.length());
+ ASSERT_TRUE(other_stack.top() == NULL);
+ ASSERT_TRUE(other_stack.pop() == NULL);
+
+ Element* top = stack.pop_all();
+ ASSERT_EQ(top, &elements[nelements - 1]);
+ other_stack.prepend(*top, elements[0]);
+
+ for (size_t i = nelements; i > 0; ) {
+ ASSERT_EQ(i, other_stack.length());
+ --i;
+ Element* e = other_stack.pop();
+ ASSERT_TRUE(e != NULL);
+ ASSERT_EQ(&elements[i], e);
+ ASSERT_EQ(i, e->id());
+ }
+ ASSERT_EQ(0u, other_stack.length());
+ ASSERT_TRUE(other_stack.pop() == NULL);
+}
+
+TEST_F(LockFreeStackTestBasics, two_stacks) {
+ Stack1 stack1;
+ ASSERT_TRUE(stack1.pop() == NULL);
+
+ for (size_t id = 0; id < nelements; ++id) {
+ stack1.push(elements[id]);
+ }
+ ASSERT_EQ(nelements, stack1.length());
+ Element* e0 = stack.top();
+ Element* e1 = stack1.top();
+ while (true) {
+ ASSERT_EQ(e0, e1);
+ if (e0 == NULL) break;
+ e0 = stack.next(*e0);
+ e1 = stack1.next(*e1);
+ }
+
+ for (size_t i = nelements; i > 0; ) {
+ ASSERT_EQ(i, stack.length());
+ ASSERT_EQ(i, stack1.length());
+ --i;
+ Element* e = stack.pop();
+ ASSERT_TRUE(e != NULL);
+ ASSERT_EQ(&elements[i], e);
+ ASSERT_EQ(i, e->id());
+
+ Element* e1 = stack1.pop();
+ ASSERT_TRUE(e1 != NULL);
+ ASSERT_EQ(&elements[i], e1);
+ ASSERT_EQ(i, e1->id());
+
+ ASSERT_EQ(e, e1);
+ }
+ ASSERT_EQ(0u, stack.length());
+ ASSERT_EQ(0u, stack1.length());
+ ASSERT_TRUE(stack.pop() == NULL);
+ ASSERT_TRUE(stack1.pop() == NULL);
+}
+
+class LockFreeStackTestThread : public JavaTestThread {
+ uint _id;
+ Stack* _from;
+ Stack* _to;
+ volatile size_t* _processed;
+ size_t _process_limit;
+ size_t _local_processed;
+ volatile bool _ready;
+
+public:
+ LockFreeStackTestThread(Semaphore* post,
+ uint id,
+ Stack* from,
+ Stack* to,
+ volatile size_t* processed,
+ size_t process_limit) :
+ JavaTestThread(post),
+ _id(id),
+ _from(from),
+ _to(to),
+ _processed(processed),
+ _process_limit(process_limit),
+ _local_processed(0),
+ _ready(false)
+ {}
+
+ virtual void main_run() {
+ OrderAccess::release_store_fence(&_ready, true);
+ while (true) {
+ Element* e = _from->pop();
+ if (e != NULL) {
+ _to->push(*e);
+ Atomic::inc(_processed);
+ ++_local_processed;
+ } else if (OrderAccess::load_acquire(_processed) == _process_limit) {
+ tty->print_cr("thread %u processed " SIZE_FORMAT, _id, _local_processed);
+ return;
+ }
+ }
+ }
+
+ bool ready() const { return OrderAccess::load_acquire(&_ready); }
+};
+
+TEST_VM(LockFreeStackTest, stress) {
+ Semaphore post;
+ Stack initial_stack;
+ Stack start_stack;
+ Stack middle_stack;
+ Stack final_stack;
+ volatile size_t stage1_processed = 0;
+ volatile size_t stage2_processed = 0;
+
+ const size_t nelements = 10000;
+ Element* elements = NEW_C_HEAP_ARRAY(Element, nelements, mtOther);
+ for (size_t id = 0; id < nelements; ++id) {
+ ::new (&elements[id]) Element(id);
+ initial_stack.push(elements[id]);
+ }
+ ASSERT_EQ(nelements, initial_stack.length());
+
+ // - stage1 threads pop from start_stack and push to middle_stack.
+ // - stage2 threads pop from middle_stack and push to final_stack.
+ // - all threads in a stage count the number of elements processed in
+ // their corresponding stageN_processed counter.
+
+ const uint stage1_threads = 2;
+ const uint stage2_threads = 2;
+ const uint nthreads = stage1_threads + stage2_threads;
+ LockFreeStackTestThread* threads[nthreads] = {};
+
+ for (uint i = 0; i < ARRAY_SIZE(threads); ++i) {
+ Stack* from = &start_stack;
+ Stack* to = &middle_stack;
+ volatile size_t* processed = &stage1_processed;
+ if (i >= stage1_threads) {
+ from = &middle_stack;
+ to = &final_stack;
+ processed = &stage2_processed;
+ }
+ threads[i] =
+ new LockFreeStackTestThread(&post, i, from, to, processed, nelements);
+ threads[i]->doit();
+ while (!threads[i]->ready()) {} // Wait until ready to start test.
+ }
+
+ // Transfer elements to start_stack to start test.
+ start_stack.prepend(*initial_stack.pop_all());
+
+ // Wait for all threads to complete.
+ for (uint i = 0; i < nthreads; ++i) {
+ post.wait();
+ }
+
+ // Verify expected state.
+ ASSERT_EQ(nelements, stage1_processed);
+ ASSERT_EQ(nelements, stage2_processed);
+ ASSERT_EQ(0u, initial_stack.length());
+ ASSERT_EQ(0u, start_stack.length());
+ ASSERT_EQ(0u, middle_stack.length());
+ ASSERT_EQ(nelements, final_stack.length());
+ while (final_stack.pop() != NULL) {}
+
+ FREE_C_HEAP_ARRAY(Element, elements);
+}