8196083: Avoid locking in OopStorage::release
Summary: Defer release list updates to later allocate/delete operations.
Reviewed-by: coleenp, eosterlund
--- a/src/hotspot/share/gc/shared/oopStorage.cpp Thu Feb 08 15:21:59 2018 -0500
+++ b/src/hotspot/share/gc/shared/oopStorage.cpp Thu Feb 08 17:23:43 2018 -0500
@@ -26,7 +26,9 @@
#include "gc/shared/oopStorage.inline.hpp"
#include "gc/shared/oopStorageParState.inline.hpp"
#include "logging/log.hpp"
+#include "logging/logStream.hpp"
#include "memory/allocation.inline.hpp"
+#include "memory/resourceArea.hpp"
#include "runtime/atomic.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/mutex.hpp"
@@ -107,7 +109,7 @@
}
// Blocks start with an array of BitsPerWord oop entries. That array
-// is divided into conceptual BytesPerWord sections of BitsPerWord
+// is divided into conceptual BytesPerWord sections of BitsPerByte
// entries. Blocks are allocated aligned on section boundaries, for
// the convenience of mapping from an entry to the containing block;
// see block_for_ptr(). Aligning on section boundary rather than on
@@ -130,7 +132,9 @@
_owner(owner),
_memory(memory),
_active_entry(),
- _allocate_entry()
+ _allocate_entry(),
+ _deferred_updates_next(NULL),
+ _release_refcount(0)
{
STATIC_ASSERT(_data_pos == 0);
STATIC_ASSERT(section_size * section_count == ARRAY_SIZE(_data));
@@ -143,6 +147,8 @@
#endif
OopStorage::Block::~Block() {
+ assert(_release_refcount == 0, "deleting block while releasing");
+ assert(_deferred_updates_next == NULL, "deleting block with deferred update");
// Clear fields used by block_for_ptr and entry validation, which
// might help catch bugs. Volatile to prevent dead-store elimination.
const_cast<uintx volatile&>(_allocated_bitmask) = 0;
@@ -182,8 +188,24 @@
return bitmask_for_index(get_index(ptr));
}
-uintx OopStorage::Block::cmpxchg_allocated_bitmask(uintx new_value, uintx compare_value) {
- return Atomic::cmpxchg(new_value, &_allocated_bitmask, compare_value);
+// A block is deletable if
+// (1) It is empty.
+// (2) There is not a release() operation currently operating on it.
+// (3) It is not in the deferred updates list.
+// The order of tests is important for proper interaction between release()
+// and concurrent deletion.
+bool OopStorage::Block::is_deletable() const {
+ return (OrderAccess::load_acquire(&_allocated_bitmask) == 0) &&
+ (OrderAccess::load_acquire(&_release_refcount) == 0) &&
+ (OrderAccess::load_acquire(&_deferred_updates_next) == NULL);
+}
+
+OopStorage::Block* OopStorage::Block::deferred_updates_next() const {
+ return _deferred_updates_next;
+}
+
+void OopStorage::Block::set_deferred_updates_next(Block* block) {
+ _deferred_updates_next = block;
}
bool OopStorage::Block::contains(const oop* ptr) const {
@@ -203,7 +225,7 @@
assert(!is_full_bitmask(allocated), "attempt to allocate from full block");
unsigned index = count_trailing_zeros(~allocated);
uintx new_value = allocated | bitmask_for_index(index);
- uintx fetched = cmpxchg_allocated_bitmask(new_value, allocated);
+ uintx fetched = Atomic::cmpxchg(new_value, &_allocated_bitmask, allocated);
if (fetched == allocated) {
return get_pointer(index); // CAS succeeded; return entry for index.
}
@@ -261,20 +283,6 @@
return NULL;
}
-bool OopStorage::is_valid_block_locked_or_safepoint(const Block* check_block) const {
- assert_locked_or_safepoint(_allocate_mutex);
- // For now, simple linear search. Do something more clever if this
- // is a performance bottleneck, particularly for allocation_status.
- for (const Block* block = _active_list.chead();
- block != NULL;
- block = _active_list.next(*block)) {
- if (check_block == block) {
- return true;
- }
- }
- return false;
-}
-
#ifdef ASSERT
void OopStorage::assert_at_safepoint() {
assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
@@ -291,39 +299,49 @@
// kept at the end of the _allocate_list, to make it easy for empty block
// deletion to find them.
//
-// allocate(), release(), and delete_empty_blocks_concurrent() all lock the
+// allocate(), and delete_empty_blocks_concurrent() lock the
// _allocate_mutex while performing any list modifications.
//
// allocate() and release() update a block's _allocated_bitmask using CAS
-// loops. This prevents loss of updates even though release() may perform
-// some updates without any locking.
+// loops. This prevents loss of updates even though release() performs
+// its updates without any locking.
//
// allocate() obtains the entry from the first block in the _allocate_list,
// and updates that block's _allocated_bitmask to indicate the entry is in
// use. If this makes the block full (all entries in use), the block is
// removed from the _allocate_list so it won't be considered by future
-// allocations until some entries in it are relased.
+// allocations until some entries in it are released.
//
-// release() looks up the block for the entry without locking. Once the block
-// has been determined, its _allocated_bitmask needs to be updated, and its
-// position in the _allocate_list may need to be updated. There are two
-// cases:
+// release() is performed lock-free. release() first looks up the block for
+// the entry, using address alignment to find the enclosing block (thereby
+// avoiding iteration over the _active_list). Once the block has been
+// determined, its _allocated_bitmask needs to be updated, and its position in
+// the _allocate_list may need to be updated. There are two cases:
//
// (a) If the block is neither full nor would become empty with the release of
// the entry, only its _allocated_bitmask needs to be updated. But if the CAS
// update fails, the applicable case may change for the retry.
//
-// (b) Otherwise, the _allocate_list will also need to be modified. This
-// requires locking the _allocate_mutex, and then attempting to CAS the
-// _allocated_bitmask. If the CAS fails, the applicable case may change for
-// the retry. If the CAS succeeds, then update the _allocate_list according
-// to the the state changes. If the block changed from full to not full, then
-// it needs to be added to the _allocate_list, for use in future allocations.
-// If the block changed from not empty to empty, then it is moved to the end
-// of the _allocate_list, for ease of empty block deletion processing.
+// (b) Otherwise, the _allocate_list also needs to be modified. This requires
+// locking the _allocate_mutex. To keep the release() operation lock-free,
+// rather than updating the _allocate_list itself, it instead performs a
+// lock-free push of the block onto the _deferred_updates list. Entries on
+// that list are processed by allocate() and delete_empty_blocks_XXX(), while
+// they already hold the necessary lock. That processing makes the block's
+// list state consistent with its current _allocated_bitmask. The block is
+// added to the _allocate_list if not already present and the bitmask is not
+// full. The block is moved to the end of the _allocated_list if the bitmask
+// is empty, for ease of empty block deletion processing.
oop* OopStorage::allocate() {
MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
+ // Do some deferred update processing every time we allocate.
+ // Continue processing deferred updates if _allocate_list is empty,
+ // in the hope that we'll get a block from that, rather than
+ // allocating a new block.
+ while (reduce_deferred_updates() && (_allocate_list.head() == NULL)) {}
+
+ // Use the first block in _allocate_list for the allocation.
Block* block = _allocate_list.head();
if (block == NULL) {
// No available blocks; make a new one, and add to storage.
@@ -331,7 +349,17 @@
MutexUnlockerEx mul(_allocate_mutex, Mutex::_no_safepoint_check_flag);
block = Block::new_block(this);
}
- if (block != NULL) {
+ if (block == NULL) {
+ while (_allocate_list.head() == NULL) {
+ if (!reduce_deferred_updates()) {
+ // Failed to make new block, no other thread made a block
+ // available while the mutex was released, and didn't get
+ // one from a deferred update either, so return failure.
+ log_info(oopstorage, ref)("%s: failed allocation", name());
+ return NULL;
+ }
+ }
+ } else {
// Add new block to storage.
log_info(oopstorage, blocks)("%s: new block " PTR_FORMAT, name(), p2i(block));
@@ -340,22 +368,14 @@
// to allocate from non-empty blocks, to allow empty blocks to
// be deleted.
_allocate_list.push_back(*block);
- ++_empty_block_count;
// Add to front of _active_list, and then record as the head
// block, for concurrent iteration protocol.
_active_list.push_front(*block);
++_block_count;
// Ensure all setup of block is complete before making it visible.
OrderAccess::release_store(&_active_head, block);
- } else {
- log_info(oopstorage, blocks)("%s: failed new block allocation", name());
}
block = _allocate_list.head();
- if (block == NULL) {
- // Failed to make new block, and no other thread made a block
- // available while the mutex was released, so return failure.
- return NULL;
- }
}
// Allocate from first block.
assert(block != NULL, "invariant");
@@ -363,7 +383,6 @@
if (block->is_empty()) {
// Transitioning from empty to not empty.
log_debug(oopstorage, blocks)("%s: block not empty " PTR_FORMAT, name(), p2i(block));
- --_empty_block_count;
}
oop* result = block->allocate();
assert(result != NULL, "allocation failed");
@@ -384,72 +403,115 @@
return Block::block_for_ptr(this, ptr);
}
-void OopStorage::release_from_block(Block& block, uintx releasing) {
- assert(releasing != 0, "invariant");
- uintx allocated = block.allocated_bitmask();
- while (true) {
- assert(releasing == (allocated & releasing), "invariant");
- uintx new_value = allocated ^ releasing;
- // CAS new_value into block's allocated bitmask, retrying with
- // updated allocated bitmask until the CAS succeeds.
- uintx fetched;
- if (!is_full_bitmask(allocated) && !is_empty_bitmask(new_value)) {
- fetched = block.cmpxchg_allocated_bitmask(new_value, allocated);
- if (fetched == allocated) return;
- } else {
- // Need special handling if transitioning from full to not full,
- // or from not empty to empty. For those cases, must hold the
- // _allocation_mutex when updating the allocated bitmask, to
- // ensure the associated list manipulations will be consistent
- // with the allocation bitmask that is visible to other threads
- // in allocate() or deleting empty blocks.
- MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
- fetched = block.cmpxchg_allocated_bitmask(new_value, allocated);
- if (fetched == allocated) {
- // CAS succeeded; handle special cases, which might no longer apply.
- if (is_full_bitmask(allocated)) {
- // Transitioning from full to not-full; add to _allocate_list.
- log_debug(oopstorage, blocks)("%s: block not full " PTR_FORMAT, name(), p2i(&block));
- _allocate_list.push_front(block);
- assert(!block.is_full(), "invariant"); // Still not full.
- }
- if (is_empty_bitmask(new_value)) {
- // Transitioning from not-empty to empty; move to end of
- // _allocate_list, to make it a deletion candidate.
- log_debug(oopstorage, blocks)("%s: block empty " PTR_FORMAT, name(), p2i(&block));
- _allocate_list.unlink(block);
- _allocate_list.push_back(block);
- ++_empty_block_count;
- assert(block.is_empty(), "invariant"); // Still empty.
- }
- return; // Successful CAS and transitions handled.
- }
- }
- // CAS failed; retry with latest value.
- allocated = fetched;
+static void log_release_transitions(uintx releasing,
+ uintx old_allocated,
+ const OopStorage* owner,
+ const void* block) {
+ ResourceMark rm;
+ Log(oopstorage, blocks) log;
+ LogStream ls(log.debug());
+ if (is_full_bitmask(old_allocated)) {
+ ls.print_cr("%s: block not full " PTR_FORMAT, owner->name(), p2i(block));
+ }
+ if (releasing == old_allocated) {
+ ls.print_cr("%s: block empty " PTR_FORMAT, owner->name(), p2i(block));
}
}
-#ifdef ASSERT
-void OopStorage::check_release(const Block* block, const oop* ptr) const {
- switch (allocation_status_validating_block(block, ptr)) {
- case INVALID_ENTRY:
- fatal("Releasing invalid entry: " PTR_FORMAT, p2i(ptr));
- break;
+void OopStorage::Block::release_entries(uintx releasing, Block* volatile* deferred_list) {
+ assert(releasing != 0, "preconditon");
+ // Prevent empty block deletion when transitioning to empty.
+ Atomic::inc(&_release_refcount);
+
+ // Atomically update allocated bitmask.
+ uintx old_allocated = _allocated_bitmask;
+ while (true) {
+ assert((releasing & ~old_allocated) == 0, "releasing unallocated entries");
+ uintx new_value = old_allocated ^ releasing;
+ uintx fetched = Atomic::cmpxchg(new_value, &_allocated_bitmask, old_allocated);
+ if (fetched == old_allocated) break; // Successful update.
+ old_allocated = fetched; // Retry with updated bitmask.
+ }
- case UNALLOCATED_ENTRY:
- fatal("Releasing unallocated entry: " PTR_FORMAT, p2i(ptr));
- break;
+ // Now that the bitmask has been updated, if we have a state transition
+ // (updated bitmask is empty or old bitmask was full), atomically push
+ // this block onto the deferred updates list. Some future call to
+ // reduce_deferred_updates will make any needed changes related to this
+ // block and _allocate_list. This deferral avoids list updates and the
+ // associated locking here.
+ if ((releasing == old_allocated) || is_full_bitmask(old_allocated)) {
+ // Log transitions. Both transitions are possible in a single update.
+ if (log_is_enabled(Debug, oopstorage, blocks)) {
+ log_release_transitions(releasing, old_allocated, _owner, this);
+ }
+ // Attempt to claim responsibility for adding this block to the deferred
+ // list, by setting the link to non-NULL by self-looping. If this fails,
+ // then someone else has made such a claim and the deferred update has not
+ // yet been processed and will include our change, so we don't need to do
+ // anything further.
+ if (Atomic::replace_if_null(this, &_deferred_updates_next)) {
+ // Successfully claimed. Push, with self-loop for end-of-list.
+ Block* head = *deferred_list;
+ while (true) {
+ _deferred_updates_next = (head == NULL) ? this : head;
+ Block* fetched = Atomic::cmpxchg(this, deferred_list, head);
+ if (fetched == head) break; // Successful update.
+ head = fetched; // Retry with updated head.
+ }
+ log_debug(oopstorage, blocks)("%s: deferred update " PTR_FORMAT,
+ _owner->name(), p2i(this));
+ }
+ }
+ // Release hold on empty block deletion.
+ Atomic::dec(&_release_refcount);
+}
- case ALLOCATED_ENTRY:
- assert(block->contains(ptr), "invariant");
- break;
+// Process one available deferred update. Returns true if one was processed.
+bool OopStorage::reduce_deferred_updates() {
+ assert_locked_or_safepoint(_allocate_mutex);
+ // Atomically pop a block off the list, if any available.
+ // No ABA issue because this is only called by one thread at a time.
+ // The atomicity is wrto pushes by release().
+ Block* block = OrderAccess::load_acquire(&_deferred_updates);
+ while (true) {
+ if (block == NULL) return false;
+ // Try atomic pop of block from list.
+ Block* tail = block->deferred_updates_next();
+ if (block == tail) tail = NULL; // Handle self-loop end marker.
+ Block* fetched = Atomic::cmpxchg(tail, &_deferred_updates, block);
+ if (fetched == block) break; // Update successful.
+ block = fetched; // Retry with updated block.
+ }
+ block->set_deferred_updates_next(NULL); // Clear tail after updating head.
+ // Ensure bitmask read after pop is complete, including clearing tail, for
+ // ordering with release(). Without this, we may be processing a stale
+ // bitmask state here while blocking a release() operation from recording
+ // the deferred update needed for its bitmask change.
+ OrderAccess::storeload();
+ // Process popped block.
+ uintx allocated = block->allocated_bitmask();
- default:
- ShouldNotReachHere();
+ // Make membership in list consistent with bitmask state.
+ if ((_allocate_list.ctail() != NULL) &&
+ ((_allocate_list.ctail() == block) ||
+ (_allocate_list.next(*block) != NULL))) {
+ // Block is in the allocate list.
+ assert(!is_full_bitmask(allocated), "invariant");
+ } else if (!is_full_bitmask(allocated)) {
+ // Block is not in the allocate list, but now should be.
+ _allocate_list.push_front(*block);
+ } // Else block is full and not in list, which is correct.
+
+ // Move empty block to end of list, for possible deletion.
+ if (is_empty_bitmask(allocated)) {
+ _allocate_list.unlink(*block);
+ _allocate_list.push_back(*block);
}
+
+ log_debug(oopstorage, blocks)("%s: processed deferred update " PTR_FORMAT,
+ name(), p2i(block));
+ return true; // Processed one pending update.
}
-#endif // ASSERT
inline void check_release_entry(const oop* entry) {
assert(entry != NULL, "Releasing NULL");
@@ -459,9 +521,9 @@
void OopStorage::release(const oop* ptr) {
check_release_entry(ptr);
Block* block = find_block_or_null(ptr);
- check_release(block, ptr);
+ assert(block != NULL, "%s: invalid release " PTR_FORMAT, name(), p2i(ptr));
log_info(oopstorage, ref)("%s: released " PTR_FORMAT, name(), p2i(ptr));
- release_from_block(*block, block->bitmask_for_entry(ptr));
+ block->release_entries(block->bitmask_for_entry(ptr), &_deferred_updates);
Atomic::dec(&_allocation_count);
}
@@ -470,15 +532,15 @@
while (i < size) {
check_release_entry(ptrs[i]);
Block* block = find_block_or_null(ptrs[i]);
- check_release(block, ptrs[i]);
+ assert(block != NULL, "%s: invalid release " PTR_FORMAT, name(), p2i(ptrs[i]));
log_info(oopstorage, ref)("%s: released " PTR_FORMAT, name(), p2i(ptrs[i]));
size_t count = 0;
uintx releasing = 0;
for ( ; i < size; ++i) {
const oop* entry = ptrs[i];
+ check_release_entry(entry);
// If entry not in block, finish block and resume outer loop with entry.
if (!block->contains(entry)) break;
- check_release_entry(entry);
// Add entry to releasing bitmap.
log_info(oopstorage, ref)("%s: released " PTR_FORMAT, name(), p2i(entry));
uintx entry_bitmask = block->bitmask_for_entry(entry);
@@ -488,7 +550,7 @@
++count;
}
// Release the contiguous entries that are in block.
- release_from_block(*block, releasing);
+ block->release_entries(releasing, &_deferred_updates);
Atomic::sub(count, &_allocation_count);
}
}
@@ -506,11 +568,11 @@
_active_list(&Block::get_active_entry),
_allocate_list(&Block::get_allocate_entry),
_active_head(NULL),
+ _deferred_updates(NULL),
_allocate_mutex(allocate_mutex),
_active_mutex(active_mutex),
_allocation_count(0),
_block_count(0),
- _empty_block_count(0),
_concurrent_iteration_active(false)
{
assert(_active_mutex->rank() < _allocate_mutex->rank(),
@@ -529,6 +591,10 @@
OopStorage::~OopStorage() {
Block* block;
+ while ((block = _deferred_updates) != NULL) {
+ _deferred_updates = block->deferred_updates_next();
+ block->set_deferred_updates_next(NULL);
+ }
while ((block = _allocate_list.head()) != NULL) {
_allocate_list.unlink(*block);
}
@@ -539,43 +605,47 @@
FREE_C_HEAP_ARRAY(char, _name);
}
-void OopStorage::delete_empty_blocks_safepoint(size_t retain) {
+void OopStorage::delete_empty_blocks_safepoint() {
assert_at_safepoint();
+ // Process any pending release updates, which may make more empty
+ // blocks available for deletion.
+ while (reduce_deferred_updates()) {}
// Don't interfere with a concurrent iteration.
if (_concurrent_iteration_active) return;
- // Compute the number of blocks to remove, to minimize volatile accesses.
- size_t empty_blocks = _empty_block_count;
- if (retain < empty_blocks) {
- size_t remove_count = empty_blocks - retain;
- // Update volatile counters once.
- _block_count -= remove_count;
- _empty_block_count -= remove_count;
- do {
- const Block* block = _allocate_list.ctail();
- assert(block != NULL, "invariant");
- assert(block->is_empty(), "invariant");
- // Remove block from lists, and delete it.
- _active_list.unlink(*block);
- _allocate_list.unlink(*block);
- delete_empty_block(*block);
- } while (--remove_count > 0);
- // Update _active_head, in case current value was in deleted set.
- _active_head = _active_list.head();
+ // Delete empty (and otherwise deletable) blocks from end of _allocate_list.
+ for (const Block* block = _allocate_list.ctail();
+ (block != NULL) && block->is_deletable();
+ block = _allocate_list.ctail()) {
+ _active_list.unlink(*block);
+ _allocate_list.unlink(*block);
+ delete_empty_block(*block);
+ --_block_count;
}
+ // Update _active_head, in case current value was in deleted set.
+ _active_head = _active_list.head();
}
-void OopStorage::delete_empty_blocks_concurrent(size_t retain) {
+void OopStorage::delete_empty_blocks_concurrent() {
MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
// Other threads could be adding to the empty block count while we
// release the mutex across the block deletions. Set an upper bound
// on how many blocks we'll try to release, so other threads can't
// cause an unbounded stay in this function.
- if (_empty_block_count <= retain) return;
- size_t limit = _empty_block_count - retain;
- for (size_t i = 0; (i < limit) && (retain < _empty_block_count); ++i) {
+ size_t limit = _block_count;
+
+ for (size_t i = 0; i < limit; ++i) {
+ // Additional updates might become available while we dropped the
+ // lock. But limit number processed to limit lock duration.
+ reduce_deferred_updates();
+
const Block* block = _allocate_list.ctail();
- assert(block != NULL, "invariant");
- assert(block->is_empty(), "invariant");
+ if ((block == NULL) || !block->is_deletable()) {
+ // No block to delete, so done. There could be more pending
+ // deferred updates that could give us more work to do; deal with
+ // that in some later call, to limit lock duration here.
+ return;
+ }
+
{
MutexLockerEx aml(_active_mutex, Mutex::_no_safepoint_check_flag);
// Don't interfere with a concurrent iteration.
@@ -589,28 +659,31 @@
}
// Remove block from _allocate_list and delete it.
_allocate_list.unlink(*block);
- --_empty_block_count;
// Release mutex while deleting block.
MutexUnlockerEx ul(_allocate_mutex, Mutex::_no_safepoint_check_flag);
delete_empty_block(*block);
}
}
-OopStorage::EntryStatus
-OopStorage::allocation_status_validating_block(const Block* block,
- const oop* ptr) const {
- MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
- if ((block == NULL) || !is_valid_block_locked_or_safepoint(block)) {
- return INVALID_ENTRY;
- } else if ((block->allocated_bitmask() & block->bitmask_for_entry(ptr)) != 0) {
- return ALLOCATED_ENTRY;
- } else {
- return UNALLOCATED_ENTRY;
+OopStorage::EntryStatus OopStorage::allocation_status(const oop* ptr) const {
+ const Block* block = find_block_or_null(ptr);
+ if (block != NULL) {
+ // Verify block is a real block. For now, simple linear search.
+ // Do something more clever if this is a performance bottleneck.
+ MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
+ for (const Block* check_block = _active_list.chead();
+ check_block != NULL;
+ check_block = _active_list.next(*check_block)) {
+ if (check_block == block) {
+ if ((block->allocated_bitmask() & block->bitmask_for_entry(ptr)) != 0) {
+ return ALLOCATED_ENTRY;
+ } else {
+ return UNALLOCATED_ENTRY;
+ }
+ }
+ }
}
-}
-
-OopStorage::EntryStatus OopStorage::allocation_status(const oop* ptr) const {
- return allocation_status_validating_block(find_block_or_null(ptr), ptr);
+ return INVALID_ENTRY;
}
size_t OopStorage::allocation_count() const {
@@ -621,10 +694,6 @@
return _block_count;
}
-size_t OopStorage::empty_block_count() const {
- return _empty_block_count;
-}
-
size_t OopStorage::total_memory_usage() const {
size_t total_size = sizeof(OopStorage);
total_size += strlen(name()) + 1;
@@ -690,17 +759,12 @@
void OopStorage::print_on(outputStream* st) const {
size_t allocations = _allocation_count;
size_t blocks = _block_count;
- size_t empties = _empty_block_count;
- // Comparison is being careful about racy accesses.
- size_t used = (blocks < empties) ? 0 : (blocks - empties);
double data_size = section_size * section_count;
- double alloc_percentage = percent_of((double)allocations, used * data_size);
+ double alloc_percentage = percent_of((double)allocations, blocks * data_size);
- st->print("%s: " SIZE_FORMAT " entries in " SIZE_FORMAT " blocks (%.F%%), "
- SIZE_FORMAT " empties, " SIZE_FORMAT " bytes",
- name(), allocations, used, alloc_percentage,
- empties, total_memory_usage());
+ st->print("%s: " SIZE_FORMAT " entries in " SIZE_FORMAT " blocks (%.F%%), " SIZE_FORMAT " bytes",
+ name(), allocations, blocks, alloc_percentage, total_memory_usage());
if (_concurrent_iteration_active) {
st->print(", concurrent iteration active");
}
--- a/src/hotspot/share/gc/shared/oopStorage.hpp Thu Feb 08 15:21:59 2018 -0500
+++ b/src/hotspot/share/gc/shared/oopStorage.hpp Thu Feb 08 17:23:43 2018 -0500
@@ -84,10 +84,6 @@
// The number of blocks of entries. Useful for sizing parallel iteration.
size_t block_count() const;
- // The number of blocks with no allocated entries. Useful for sizing
- // parallel iteration and scheduling block deletion.
- size_t empty_block_count() const;
-
// Total number of blocks * memory allocation per block, plus
// bookkeeping overhead, including this storage object.
size_t total_memory_usage() const;
@@ -107,14 +103,13 @@
// postcondition: *result == NULL.
oop* allocate();
- // Deallocates ptr, after setting its value to NULL. Locks _allocate_mutex.
+ // Deallocates ptr. No locking.
// precondition: ptr is a valid allocated entry.
// precondition: *ptr == NULL.
void release(const oop* ptr);
// Releases all the ptrs. Possibly faster than individual calls to
- // release(oop*). Best if ptrs is sorted by address. Locks
- // _allocate_mutex.
+ // release(oop*). Best if ptrs is sorted by address. No locking.
// precondition: All elements of ptrs are valid allocated entries.
// precondition: *ptrs[i] == NULL, for i in [0,size).
void release(const oop* const* ptrs, size_t size);
@@ -160,8 +155,8 @@
// Block cleanup functions are for the exclusive use of the GC.
// Both stop deleting if there is an in-progress concurrent iteration.
// Concurrent deletion locks both the allocate_mutex and the active_mutex.
- void delete_empty_blocks_safepoint(size_t retain = 1);
- void delete_empty_blocks_concurrent(size_t retain = 1);
+ void delete_empty_blocks_safepoint();
+ void delete_empty_blocks_concurrent();
// Debugging and logging support.
const char* name() const;
@@ -231,6 +226,7 @@
BlockList _active_list;
BlockList _allocate_list;
Block* volatile _active_head;
+ Block* volatile _deferred_updates;
Mutex* _allocate_mutex;
Mutex* _active_mutex;
@@ -238,16 +234,12 @@
// Counts are volatile for racy unlocked accesses.
volatile size_t _allocation_count;
volatile size_t _block_count;
- volatile size_t _empty_block_count;
// mutable because this gets set even for const iteration.
mutable bool _concurrent_iteration_active;
Block* find_block_or_null(const oop* ptr) const;
- bool is_valid_block_locked_or_safepoint(const Block* block) const;
- EntryStatus allocation_status_validating_block(const Block* block, const oop* ptr) const;
- void check_release(const Block* block, const oop* ptr) const NOT_DEBUG_RETURN;
- void release_from_block(Block& block, uintx release_bitmask);
void delete_empty_block(const Block& block);
+ bool reduce_deferred_updates();
static void assert_at_safepoint() NOT_DEBUG_RETURN;
--- a/src/hotspot/share/gc/shared/oopStorage.inline.hpp Thu Feb 08 15:21:59 2018 -0500
+++ b/src/hotspot/share/gc/shared/oopStorage.inline.hpp Thu Feb 08 17:23:43 2018 -0500
@@ -44,6 +44,8 @@
void* _memory; // Unaligned storage containing block.
BlockEntry _active_entry;
BlockEntry _allocate_entry;
+ Block* volatile _deferred_updates_next;
+ volatile uintx _release_refcount;
Block(const OopStorage* owner, void* memory);
~Block();
@@ -75,7 +77,10 @@
bool is_full() const;
bool is_empty() const;
uintx allocated_bitmask() const;
- uintx cmpxchg_allocated_bitmask(uintx new_value, uintx compare_value);
+ bool is_deletable() const;
+
+ Block* deferred_updates_next() const;
+ void set_deferred_updates_next(Block* new_next);
bool contains(const oop* ptr) const;
@@ -86,6 +91,8 @@
static Block* new_block(const OopStorage* owner);
static void delete_block(const Block& block);
+ void release_entries(uintx releasing, Block* volatile* deferred_list);
+
template<typename F> bool iterate(F f);
template<typename F> bool iterate(F f) const;
}; // class Block
--- a/src/hotspot/share/runtime/mutexLocker.cpp Thu Feb 08 15:21:59 2018 -0500
+++ b/src/hotspot/share/runtime/mutexLocker.cpp Thu Feb 08 17:23:43 2018 -0500
@@ -253,10 +253,10 @@
// of some places which hold other locks while releasing a handle, including
// the Patching_lock, which is of "special" rank. As a temporary workaround,
// lower the JNI oopstorage lock ranks to make them super-special.
- def(JNIGlobalAlloc_lock , PaddedMutex , special-1, true, Monitor::_safepoint_check_never);
- def(JNIGlobalActive_lock , PaddedMutex , special-2, true, Monitor::_safepoint_check_never);
- def(JNIWeakAlloc_lock , PaddedMutex , special-1, true, Monitor::_safepoint_check_never);
- def(JNIWeakActive_lock , PaddedMutex , special-2, true, Monitor::_safepoint_check_never);
+ def(JNIGlobalAlloc_lock , PaddedMutex , nonleaf, true, Monitor::_safepoint_check_never);
+ def(JNIGlobalActive_lock , PaddedMutex , nonleaf-1, true, Monitor::_safepoint_check_never);
+ def(JNIWeakAlloc_lock , PaddedMutex , nonleaf, true, Monitor::_safepoint_check_never);
+ def(JNIWeakActive_lock , PaddedMutex , nonleaf-1, true, Monitor::_safepoint_check_never);
def(JNICritical_lock , PaddedMonitor, nonleaf, true, Monitor::_safepoint_check_always); // used for JNI critical regions
def(AdapterHandlerLibrary_lock , PaddedMutex , nonleaf, true, Monitor::_safepoint_check_always);
--- a/test/hotspot/gtest/gc/shared/test_oopStorage.cpp Thu Feb 08 15:21:59 2018 -0500
+++ b/test/hotspot/gtest/gc/shared/test_oopStorage.cpp Thu Feb 08 17:23:43 2018 -0500
@@ -70,6 +70,10 @@
return storage._allocate_mutex;
}
+ static bool reduce_deferred_updates(OopStorage& storage) {
+ return storage.reduce_deferred_updates();
+ }
+
static bool block_is_empty(const Block& block) {
return block.is_empty();
}
@@ -127,9 +131,31 @@
return list.chead() == NULL;
}
-static void release_entry(OopStorage& storage, oop* entry) {
+static bool process_deferred_updates(OopStorage& storage) {
+ MutexLockerEx ml(TestAccess::allocate_mutex(storage), Mutex::_no_safepoint_check_flag);
+ bool result = false;
+ while (TestAccess::reduce_deferred_updates(storage)) {
+ result = true;
+ }
+ return result;
+}
+
+static void release_entry(OopStorage& storage, oop* entry, bool process_deferred = true) {
*entry = NULL;
storage.release(entry);
+ if (process_deferred) {
+ process_deferred_updates(storage);
+ }
+}
+
+static size_t empty_block_count(const OopStorage& storage) {
+ const TestAccess::BlockList& list = TestAccess::allocate_list(storage);
+ size_t count = 0;
+ for (const OopBlock* block = list.ctail();
+ (block != NULL) && block->is_empty();
+ ++count, block = list.prev(*block))
+ {}
+ return count;
}
class OopStorageTest : public ::testing::Test {
@@ -188,31 +214,22 @@
class OopStorageTestWithAllocation::VM_DeleteBlocksAtSafepoint
: public VM_GTestExecuteAtSafepoint {
public:
- VM_DeleteBlocksAtSafepoint(OopStorage* storage, size_t retain) :
- _storage(storage), _retain(retain)
- {}
+ VM_DeleteBlocksAtSafepoint(OopStorage* storage) : _storage(storage) {}
void doit() {
- _storage->delete_empty_blocks_safepoint(_retain);
+ _storage->delete_empty_blocks_safepoint();
}
private:
OopStorage* _storage;
- size_t _retain;
};
static bool is_allocate_list_sorted(const OopStorage& storage) {
// The allocate_list isn't strictly sorted. Rather, all empty
- // blocks are segregated to the end of the list. And the number of
- // empty blocks should match empty_block_count().
- size_t expected_empty = storage.empty_block_count();
+ // blocks are segregated to the end of the list.
const TestAccess::BlockList& list = TestAccess::allocate_list(storage);
const OopBlock* block = list.ctail();
- for (size_t i = 0; i < expected_empty; ++i, block = list.prev(*block)) {
- if ((block == NULL) || !block->is_empty()) {
- return false;
- }
- }
+ for ( ; (block != NULL) && block->is_empty(); block = list.prev(*block)) {}
for ( ; block != NULL; block = list.prev(*block)) {
if (block->is_empty()) {
return false;
@@ -243,7 +260,7 @@
EXPECT_EQ(1u, _storage.block_count());
EXPECT_EQ(1u, list_length(TestAccess::allocate_list(_storage)));
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
const OopBlock* block = TestAccess::allocate_list(_storage).chead();
EXPECT_NE(block, (OopBlock*)NULL);
@@ -259,7 +276,7 @@
EXPECT_EQ(1u, _storage.block_count());
EXPECT_EQ(1u, list_length(TestAccess::allocate_list(_storage)));
- EXPECT_EQ(1u, _storage.empty_block_count());
+ EXPECT_EQ(1u, empty_block_count(_storage));
const OopBlock* new_block = TestAccess::allocate_list(_storage).chead();
EXPECT_EQ(block, new_block);
@@ -322,14 +339,14 @@
TestAccess::BlockList& active_list = TestAccess::active_list(_storage);
TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage);
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
entries[0] = _storage.allocate();
ASSERT_TRUE(entries[0] != NULL);
EXPECT_EQ(1u, list_length(active_list));
EXPECT_EQ(1u, _storage.block_count());
EXPECT_EQ(1u, list_length(allocate_list));
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
const OopBlock* block = active_list.chead();
EXPECT_EQ(1u, TestAccess::block_allocation_count(*block));
@@ -339,7 +356,7 @@
entries[i] = _storage.allocate();
EXPECT_EQ(i + 1, _storage.allocation_count());
ASSERT_TRUE(entries[i] != NULL);
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
if (block == NULL) {
ASSERT_FALSE(is_list_empty(allocate_list));
@@ -374,7 +391,7 @@
EXPECT_EQ(list_length(active_list), list_length(allocate_list));
EXPECT_EQ(list_length(active_list), _storage.block_count());
- EXPECT_EQ(list_length(active_list), _storage.empty_block_count());
+ EXPECT_EQ(list_length(active_list), empty_block_count(_storage));
for (const OopBlock* block = allocate_list.chead();
block != NULL;
block = allocate_list.next(*block)) {
@@ -386,7 +403,7 @@
static const size_t step = 11;
ASSERT_NE(0u, _max_entries % step); // max_entries and step are mutually prime
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
TestAccess::BlockList& active_list = TestAccess::active_list(_storage);
TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage);
@@ -409,7 +426,7 @@
EXPECT_EQ(list_length(active_list), list_length(allocate_list));
EXPECT_EQ(list_length(active_list), _storage.block_count());
EXPECT_EQ(0u, total_allocation_count(active_list));
- EXPECT_EQ(list_length(allocate_list), _storage.empty_block_count());
+ EXPECT_EQ(list_length(allocate_list), empty_block_count(_storage));
}
TEST_VM_F(OopStorageTestWithAllocation, random_allocate_release) {
@@ -417,7 +434,7 @@
static const size_t allocate_step = 5;
ASSERT_NE(0u, _max_entries % release_step); // max_entries and step are mutually prime
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
TestAccess::BlockList& active_list = TestAccess::active_list(_storage);
TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage);
@@ -449,7 +466,7 @@
EXPECT_EQ(list_length(active_list), list_length(allocate_list));
EXPECT_EQ(list_length(active_list), _storage.block_count());
EXPECT_EQ(0u, total_allocation_count(active_list));
- EXPECT_EQ(list_length(allocate_list), _storage.empty_block_count());
+ EXPECT_EQ(list_length(allocate_list), empty_block_count(_storage));
}
template<bool sorted>
@@ -471,11 +488,12 @@
EXPECT_EQ(_max_entries - nrelease, _storage.allocation_count());
for (size_t i = 0; i < nrelease; ++i) {
- release_entry(_storage, _entries[2 * i + 1]);
+ release_entry(_storage, _entries[2 * i + 1], false);
EXPECT_EQ(_max_entries - nrelease - (i + 1), _storage.allocation_count());
}
+ EXPECT_TRUE(process_deferred_updates(_storage));
- EXPECT_EQ(_storage.block_count(), _storage.empty_block_count());
+ EXPECT_EQ(_storage.block_count(), empty_block_count(_storage));
FREE_C_HEAP_ARRAY(oop*, to_release);
}
@@ -607,8 +625,9 @@
}
while (allocated > 0) {
- release_entry(_storage, entries[--allocated]);
+ release_entry(_storage, entries[--allocated], false);
}
+ process_deferred_updates(_storage);
}
class OopStorageTestIteration : public OopStorageTestWithAllocation {
@@ -627,16 +646,17 @@
memset(_states, 0, sizeof(_states));
size_t initial_release = 0;
- for ( ; _storage.empty_block_count() < 2; ++initial_release) {
+ for ( ; empty_block_count(_storage) < 2; ++initial_release) {
ASSERT_GT(_max_entries, initial_release);
release_entry(_storage, _entries[initial_release]);
_states[0][initial_release] = mark_released;
}
for (size_t i = initial_release; i < _max_entries; i += 3) {
- release_entry(_storage, _entries[i]);
+ release_entry(_storage, _entries[i], false);
_states[0][i] = mark_released;
}
+ process_deferred_updates(_storage);
}
class VerifyState;
@@ -1006,30 +1026,21 @@
EXPECT_EQ(initial_active_size, _storage.block_count());
ASSERT_LE(3u, initial_active_size); // Need at least 3 blocks for test
- for (size_t i = 0; _storage.empty_block_count() < 3; ++i) {
+ for (size_t i = 0; empty_block_count(_storage) < 3; ++i) {
ASSERT_GT(_max_entries, i);
release_entry(_storage, _entries[i]);
}
EXPECT_EQ(initial_active_size, list_length(active_list));
EXPECT_EQ(initial_active_size, _storage.block_count());
- EXPECT_EQ(3u, _storage.empty_block_count());
+ EXPECT_EQ(3u, empty_block_count(_storage));
{
ThreadInVMfromNative invm(JavaThread::current());
- VM_DeleteBlocksAtSafepoint op(&_storage, 2);
+ VM_DeleteBlocksAtSafepoint op(&_storage);
VMThread::execute(&op);
}
- EXPECT_EQ(2u, _storage.empty_block_count());
- EXPECT_EQ(initial_active_size - 1, list_length(active_list));
- EXPECT_EQ(initial_active_size - 1, _storage.block_count());
-
- {
- ThreadInVMfromNative invm(JavaThread::current());
- VM_DeleteBlocksAtSafepoint op(&_storage, 0);
- VMThread::execute(&op);
- }
- EXPECT_EQ(0u, _storage.empty_block_count());
+ EXPECT_EQ(0u, empty_block_count(_storage));
EXPECT_EQ(initial_active_size - 3, list_length(active_list));
EXPECT_EQ(initial_active_size - 3, _storage.block_count());
}
@@ -1041,22 +1052,17 @@
EXPECT_EQ(initial_active_size, _storage.block_count());
ASSERT_LE(3u, initial_active_size); // Need at least 3 blocks for test
- for (size_t i = 0; _storage.empty_block_count() < 3; ++i) {
+ for (size_t i = 0; empty_block_count(_storage) < 3; ++i) {
ASSERT_GT(_max_entries, i);
release_entry(_storage, _entries[i]);
}
EXPECT_EQ(initial_active_size, list_length(active_list));
EXPECT_EQ(initial_active_size, _storage.block_count());
- EXPECT_EQ(3u, _storage.empty_block_count());
+ EXPECT_EQ(3u, empty_block_count(_storage));
- _storage.delete_empty_blocks_concurrent(2);
- EXPECT_EQ(2u, _storage.empty_block_count());
- EXPECT_EQ(initial_active_size - 1, list_length(active_list));
- EXPECT_EQ(initial_active_size - 1, _storage.block_count());
-
- _storage.delete_empty_blocks_concurrent(0);
- EXPECT_EQ(0u, _storage.empty_block_count());
+ _storage.delete_empty_blocks_concurrent();
+ EXPECT_EQ(0u, empty_block_count(_storage));
EXPECT_EQ(initial_active_size - 3, list_length(active_list));
EXPECT_EQ(initial_active_size - 3, _storage.block_count());
}
@@ -1075,13 +1081,14 @@
for (size_t i = 0; i < _max_entries; ++i) {
if ((_entries[i] != retained) && (_entries[i] != released)) {
- release_entry(_storage, _entries[i]);
+ // Leave deferred release updates to block deletion.
+ release_entry(_storage, _entries[i], false);
}
}
{
ThreadInVMfromNative invm(JavaThread::current());
- VM_DeleteBlocksAtSafepoint op(&_storage, 0);
+ VM_DeleteBlocksAtSafepoint op(&_storage);
VMThread::execute(&op);
}
EXPECT_EQ(OopStorage::ALLOCATED_ENTRY, _storage.allocation_status(retained));
@@ -1121,12 +1128,14 @@
TEST_VM_F(OopStorageTestWithAllocation, print_storage) {
// Release the first 1/2
for (size_t i = 0; i < (_max_entries / 2); ++i) {
- release_entry(_storage, _entries[i]);
+ // Deferred updates don't affect print output.
+ release_entry(_storage, _entries[i], false);
_entries[i] = NULL;
}
// Release every other remaining
for (size_t i = _max_entries / 2; i < _max_entries; i += 2) {
- release_entry(_storage, _entries[i]);
+ // Deferred updates don't affect print output.
+ release_entry(_storage, _entries[i], false);
_entries[i] = NULL;
}
@@ -1137,24 +1146,17 @@
size_t expected_blocks = (_max_entries + entries_per_block - 1) / entries_per_block;
EXPECT_EQ(expected_blocks, _storage.block_count());
- size_t expected_empties = (_max_entries / 2) / entries_per_block;
- EXPECT_EQ(expected_empties, _storage.empty_block_count());
-
- size_t expected_used = expected_blocks - expected_empties;
-
- double expected_usage = (100.0 * expected_entries) / (expected_used * entries_per_block);
+ double expected_usage = (100.0 * expected_entries) / (expected_blocks * entries_per_block);
{
ResourceMark rm;
stringStream expected_st;
expected_st.print("Test Storage: " SIZE_FORMAT
" entries in " SIZE_FORMAT
- " blocks (%.F%%), " SIZE_FORMAT
- " empties, " SIZE_FORMAT " bytes",
+ " blocks (%.F%%), " SIZE_FORMAT " bytes",
expected_entries,
- expected_used,
+ expected_blocks,
expected_usage,
- expected_empties,
_storage.total_memory_usage());
stringStream st;
_storage.print_on(&st);