# HG changeset patch # User kbarrett # Date 1525383410 14400 # Node ID 9f758f0bb058a2d251f5bd8909f4007d995b90d1 # Parent 19829b375d08006f054d92f124302fa8637cfa41 8200557: OopStorage parallel iteration scales poorly Summary: Change representation of sequence of all blocks for better scaling. Reviewed-by: coleenp, eosterlund diff -r 19829b375d08 -r 9f758f0bb058 src/hotspot/share/gc/shared/oopStorage.cpp --- a/src/hotspot/share/gc/shared/oopStorage.cpp Thu May 03 14:13:20 2018 -0700 +++ b/src/hotspot/share/gc/shared/oopStorage.cpp Thu May 03 17:36:50 2018 -0400 @@ -28,20 +28,22 @@ #include "logging/log.hpp" #include "logging/logStream.hpp" #include "memory/allocation.inline.hpp" -#include "memory/resourceArea.hpp" #include "runtime/atomic.hpp" +#include "runtime/globals.hpp" #include "runtime/handles.inline.hpp" #include "runtime/mutex.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/orderAccess.inline.hpp" #include "runtime/safepoint.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" #include "utilities/align.hpp" #include "utilities/count_trailing_zeros.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/macros.hpp" #include "utilities/ostream.hpp" +#include "utilities/spinYield.hpp" OopStorage::BlockEntry::BlockEntry() : _prev(NULL), _next(NULL) {} @@ -108,6 +110,90 @@ } } +OopStorage::BlockArray::BlockArray(size_t size) : + _size(size), + _block_count(0), + _refcount(0) +{} + +OopStorage::BlockArray::~BlockArray() { + assert(_refcount == 0, "precondition"); +} + +OopStorage::BlockArray* OopStorage::BlockArray::create(size_t size, AllocFailType alloc_fail) { + size_t size_in_bytes = blocks_offset() + sizeof(Block*) * size; + void* mem = NEW_C_HEAP_ARRAY3(char, size_in_bytes, mtGC, CURRENT_PC, alloc_fail); + if (mem == NULL) return NULL; + return new (mem) BlockArray(size); +} + +void OopStorage::BlockArray::destroy(BlockArray* ba) { + ba->~BlockArray(); + FREE_C_HEAP_ARRAY(char, ba); +} + +size_t OopStorage::BlockArray::size() const { + return _size; +} + +size_t OopStorage::BlockArray::block_count() const { + return _block_count; +} + +size_t OopStorage::BlockArray::block_count_acquire() const { + return OrderAccess::load_acquire(&_block_count); +} + +void OopStorage::BlockArray::increment_refcount() const { + int new_value = Atomic::add(1, &_refcount); + assert(new_value >= 1, "negative refcount %d", new_value - 1); +} + +bool OopStorage::BlockArray::decrement_refcount() const { + int new_value = Atomic::sub(1, &_refcount); + assert(new_value >= 0, "negative refcount %d", new_value); + return new_value == 0; +} + +bool OopStorage::BlockArray::push(Block* block) { + size_t index = _block_count; + if (index < _size) { + block->set_active_index(index); + *block_ptr(index) = block; + // Use a release_store to ensure all the setup is complete before + // making the block visible. + OrderAccess::release_store(&_block_count, index + 1); + return true; + } else { + return false; + } +} + +void OopStorage::BlockArray::remove(Block* block) { + assert(_block_count > 0, "array is empty"); + size_t index = block->active_index(); + assert(*block_ptr(index) == block, "block not present"); + size_t last_index = _block_count - 1; + Block* last_block = *block_ptr(last_index); + last_block->set_active_index(index); + *block_ptr(index) = last_block; + _block_count = last_index; +} + +void OopStorage::BlockArray::copy_from(const BlockArray* from) { + assert(_block_count == 0, "array must be empty"); + size_t count = from->_block_count; + assert(count <= _size, "precondition"); + Block* const* from_ptr = from->block_ptr(0); + Block** to_ptr = block_ptr(0); + for (size_t i = 0; i < count; ++i) { + Block* block = *from_ptr++; + assert(block->active_index() == i, "invariant"); + *to_ptr++ = block; + } + _block_count = count; +} + // Blocks start with an array of BitsPerWord oop entries. That array // is divided into conceptual BytesPerWord sections of BitsPerByte // entries. Blocks are allocated aligned on section boundaries, for @@ -125,7 +211,7 @@ _allocated_bitmask(0), _owner(owner), _memory(memory), - _active_entry(), + _active_index(0), _allocate_entry(), _deferred_updates_next(NULL), _release_refcount(0) @@ -146,10 +232,6 @@ const_cast(_owner) = NULL; } -const OopStorage::BlockEntry& OopStorage::Block::get_active_entry(const Block& block) { - return block._active_entry; -} - const OopStorage::BlockEntry& OopStorage::Block::get_allocate_entry(const Block& block) { return block._allocate_entry; } @@ -204,6 +286,20 @@ return (base <= ptr) && (ptr < (base + ARRAY_SIZE(_data))); } +size_t OopStorage::Block::active_index() const { + return _active_index; +} + +void OopStorage::Block::set_active_index(size_t index) { + _active_index = index; +} + +size_t OopStorage::Block::active_index_safe(const Block* block) { + STATIC_ASSERT(sizeof(intptr_t) == sizeof(block->_active_index)); + assert(CanUseSafeFetchN(), "precondition"); + return SafeFetchN((intptr_t*)&block->_active_index, 0); +} + unsigned OopStorage::Block::get_index(const oop* ptr) const { assert(contains(ptr), PTR_FORMAT " not in block " PTR_FORMAT, p2i(ptr), p2i(this)); return static_cast(ptr - get_pointer(0)); @@ -246,7 +342,7 @@ // This can return a false positive if ptr is not contained by some // block. For some uses, it is a precondition that ptr is valid, -// e.g. contained in some block in owner's _active_list. Other uses +// e.g. contained in some block in owner's _active_array. Other uses // require additional validation of the result. OopStorage::Block* OopStorage::Block::block_for_ptr(const OopStorage* owner, const oop* ptr) { @@ -280,12 +376,12 @@ // Allocation involves the _allocate_list, which contains a subset of the // blocks owned by a storage object. This is a doubly-linked list, linked // through dedicated fields in the blocks. Full blocks are removed from this -// list, though they are still present in the _active_list. Empty blocks are +// list, though they are still present in the _active_array. Empty blocks are // kept at the end of the _allocate_list, to make it easy for empty block // deletion to find them. // // allocate(), and delete_empty_blocks_concurrent() lock the -// _allocate_mutex while performing any list modifications. +// _allocate_mutex while performing any list and array modifications. // // allocate() and release() update a block's _allocated_bitmask using CAS // loops. This prevents loss of updates even though release() performs @@ -299,7 +395,7 @@ // // release() is performed lock-free. release() first looks up the block for // the entry, using address alignment to find the enclosing block (thereby -// avoiding iteration over the _active_list). Once the block has been +// avoiding iteration over the _active_array). Once the block has been // determined, its _allocated_bitmask needs to be updated, and its position in // the _allocate_list may need to be updated. There are two cases: // @@ -340,7 +436,7 @@ // Failed to make new block, no other thread made a block // available while the mutex was released, and didn't get // one from a deferred update either, so return failure. - log_info(oopstorage, ref)("%s: failed allocation", name()); + log_info(oopstorage, ref)("%s: failed block allocation", name()); return NULL; } } @@ -348,17 +444,21 @@ // Add new block to storage. log_info(oopstorage, blocks)("%s: new block " PTR_FORMAT, name(), p2i(block)); + // Add new block to the _active_array, growing if needed. + if (!_active_array->push(block)) { + if (expand_active_array()) { + guarantee(_active_array->push(block), "push failed after expansion"); + } else { + log_info(oopstorage, blocks)("%s: failed active array expand", name()); + Block::delete_block(*block); + return NULL; + } + } // Add to end of _allocate_list. The mutex release allowed // other threads to add blocks to the _allocate_list. We prefer // to allocate from non-empty blocks, to allow empty blocks to // be deleted. _allocate_list.push_back(*block); - // Add to front of _active_list, and then record as the head - // block, for concurrent iteration protocol. - _active_list.push_front(*block); - ++_block_count; - // Ensure all setup of block is complete before making it visible. - OrderAccess::release_store(&_active_head, block); } block = _allocate_list.head(); } @@ -383,6 +483,123 @@ return result; } +// Create a new, larger, active array with the same content as the +// current array, and then replace, relinquishing the old array. +// Return true if the array was successfully expanded, false to +// indicate allocation failure. +bool OopStorage::expand_active_array() { + assert_lock_strong(_allocate_mutex); + BlockArray* old_array = _active_array; + size_t new_size = 2 * old_array->size(); + log_info(oopstorage, blocks)("%s: expand active array " SIZE_FORMAT, + name(), new_size); + BlockArray* new_array = BlockArray::create(new_size, AllocFailStrategy::RETURN_NULL); + if (new_array == NULL) return false; + new_array->copy_from(old_array); + replace_active_array(new_array); + relinquish_block_array(old_array); + return true; +} + +OopStorage::ProtectActive::ProtectActive() : _enter(0), _exit() {} + +// Begin read-side critical section. +uint OopStorage::ProtectActive::read_enter() { + return Atomic::add(2u, &_enter); +} + +// End read-side critical section. +void OopStorage::ProtectActive::read_exit(uint enter_value) { + Atomic::add(2u, &_exit[enter_value & 1]); +} + +// Wait until all readers that entered the critical section before +// synchronization have exited that critical section. +void OopStorage::ProtectActive::write_synchronize() { + SpinYield spinner; + // Determine old and new exit counters, based on bit0 of the + // on-entry _enter counter. + uint value = OrderAccess::load_acquire(&_enter); + volatile uint* new_ptr = &_exit[(value + 1) & 1]; + // Atomically change the in-use exit counter to the new counter, by + // adding 1 to the _enter counter (flipping bit0 between 0 and 1) + // and initializing the new exit counter to that enter value. Note: + // The new exit counter is not being used by read operations until + // this change succeeds. + uint old; + do { + old = value; + *new_ptr = ++value; + value = Atomic::cmpxchg(value, &_enter, old); + } while (old != value); + // Readers that entered the critical section before we changed the + // selected exit counter will use the old exit counter. Readers + // entering after the change will use the new exit counter. Wait + // for all the critical sections started before the change to + // complete, e.g. for the value of old_ptr to catch up with old. + volatile uint* old_ptr = &_exit[old & 1]; + while (old != OrderAccess::load_acquire(old_ptr)) { + spinner.wait(); + } +} + +// Make new_array the _active_array. Increments new_array's refcount +// to account for the new reference. The assignment is atomic wrto +// obtain_active_array; once this function returns, it is safe for the +// caller to relinquish the old array. +void OopStorage::replace_active_array(BlockArray* new_array) { + // Caller has the old array that is the current value of _active_array. + // Update new_array refcount to account for the new reference. + new_array->increment_refcount(); + // Install new_array, ensuring its initialization is complete first. + OrderAccess::release_store(&_active_array, new_array); + // Wait for any readers that could read the old array from _active_array. + _protect_active.write_synchronize(); + // All obtain critical sections that could see the old array have + // completed, having incremented the refcount of the old array. The + // caller can now safely relinquish the old array. +} + +// Atomically (wrto replace_active_array) get the active array and +// increment its refcount. This provides safe access to the array, +// even if an allocate operation expands and replaces the value of +// _active_array. The caller must relinquish the array when done +// using it. +OopStorage::BlockArray* OopStorage::obtain_active_array() const { + uint enter_value = _protect_active.read_enter(); + BlockArray* result = OrderAccess::load_acquire(&_active_array); + result->increment_refcount(); + _protect_active.read_exit(enter_value); + return result; +} + +// Decrement refcount of array and destroy if refcount is zero. +void OopStorage::relinquish_block_array(BlockArray* array) const { + if (array->decrement_refcount()) { + assert(array != _active_array, "invariant"); + BlockArray::destroy(array); + } +} + +class OopStorage::WithActiveArray : public StackObj { + const OopStorage* _storage; + BlockArray* _active_array; + +public: + WithActiveArray(const OopStorage* storage) : + _storage(storage), + _active_array(storage->obtain_active_array()) + {} + + ~WithActiveArray() { + _storage->relinquish_block_array(_active_array); + } + + BlockArray& active_array() const { + return *_active_array; + } +}; + OopStorage::Block* OopStorage::find_block_or_null(const oop* ptr) const { assert(ptr != NULL, "precondition"); return Block::block_for_ptr(this, ptr); @@ -392,7 +609,6 @@ uintx old_allocated, const OopStorage* owner, const void* block) { - ResourceMark rm; Log(oopstorage, blocks) log; LogStream ls(log.debug()); if (is_full_bitmask(old_allocated)) { @@ -546,20 +762,21 @@ return dup; } +const size_t initial_active_array_size = 8; + OopStorage::OopStorage(const char* name, Mutex* allocate_mutex, Mutex* active_mutex) : _name(dup_name(name)), - _active_list(&Block::get_active_entry), + _active_array(BlockArray::create(initial_active_array_size)), _allocate_list(&Block::get_allocate_entry), - _active_head(NULL), _deferred_updates(NULL), _allocate_mutex(allocate_mutex), _active_mutex(active_mutex), _allocation_count(0), - _block_count(0), _concurrent_iteration_active(false) { + _active_array->increment_refcount(); assert(_active_mutex->rank() < _allocate_mutex->rank(), "%s: active_mutex must have lower rank than allocate_mutex", _name); assert(_active_mutex->_safepoint_check_required != Mutex::_safepoint_check_always, @@ -583,10 +800,13 @@ while ((block = _allocate_list.head()) != NULL) { _allocate_list.unlink(*block); } - while ((block = _active_list.head()) != NULL) { - _active_list.unlink(*block); + bool unreferenced = _active_array->decrement_refcount(); + assert(unreferenced, "deleting storage while _active_array is referenced"); + for (size_t i = _active_array->block_count(); 0 < i; ) { + block = _active_array->at(--i); Block::delete_block(*block); } + BlockArray::destroy(_active_array); FREE_C_HEAP_ARRAY(char, _name); } @@ -598,16 +818,13 @@ // Don't interfere with a concurrent iteration. if (_concurrent_iteration_active) return; // Delete empty (and otherwise deletable) blocks from end of _allocate_list. - for (const Block* block = _allocate_list.ctail(); + for (Block* block = _allocate_list.tail(); (block != NULL) && block->is_deletable(); - block = _allocate_list.ctail()) { - _active_list.unlink(*block); + block = _allocate_list.tail()) { + _active_array->remove(block); _allocate_list.unlink(*block); delete_empty_block(*block); - --_block_count; } - // Update _active_head, in case current value was in deleted set. - _active_head = _active_list.head(); } void OopStorage::delete_empty_blocks_concurrent() { @@ -616,14 +833,14 @@ // release the mutex across the block deletions. Set an upper bound // on how many blocks we'll try to release, so other threads can't // cause an unbounded stay in this function. - size_t limit = _block_count; + size_t limit = block_count(); for (size_t i = 0; i < limit; ++i) { // Additional updates might become available while we dropped the // lock. But limit number processed to limit lock duration. reduce_deferred_updates(); - const Block* block = _allocate_list.ctail(); + Block* block = _allocate_list.tail(); if ((block == NULL) || !block->is_deletable()) { // No block to delete, so done. There could be more pending // deferred updates that could give us more work to do; deal with @@ -635,12 +852,7 @@ MutexLockerEx aml(_active_mutex, Mutex::_no_safepoint_check_flag); // Don't interfere with a concurrent iteration. if (_concurrent_iteration_active) return; - // Remove block from _active_list, updating head if needed. - _active_list.unlink(*block); - --_block_count; - if (block == _active_head) { - _active_head = _active_list.head(); - } + _active_array->remove(block); } // Remove block from _allocate_list and delete it. _allocate_list.unlink(*block); @@ -653,18 +865,17 @@ OopStorage::EntryStatus OopStorage::allocation_status(const oop* ptr) const { const Block* block = find_block_or_null(ptr); if (block != NULL) { - // Verify block is a real block. For now, simple linear search. - // Do something more clever if this is a performance bottleneck. + // Prevent block deletion and _active_array modification. MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag); - for (const Block* check_block = _active_list.chead(); - check_block != NULL; - check_block = _active_list.next(*check_block)) { - if (check_block == block) { - if ((block->allocated_bitmask() & block->bitmask_for_entry(ptr)) != 0) { - return ALLOCATED_ENTRY; - } else { - return UNALLOCATED_ENTRY; - } + // Block could be a false positive, so get index carefully. + size_t index = Block::active_index_safe(block); + if ((index < _active_array->block_count()) && + (block == _active_array->at(index)) && + block->contains(ptr)) { + if ((block->allocated_bitmask() & block->bitmask_for_entry(ptr)) != 0) { + return ALLOCATED_ENTRY; + } else { + return UNALLOCATED_ENTRY; } } } @@ -676,30 +887,50 @@ } size_t OopStorage::block_count() const { - return _block_count; + WithActiveArray wab(this); + // Count access is racy, but don't care. + return wab.active_array().block_count(); } size_t OopStorage::total_memory_usage() const { size_t total_size = sizeof(OopStorage); total_size += strlen(name()) + 1; - total_size += block_count() * Block::allocation_size(); + total_size += sizeof(BlockArray); + WithActiveArray wab(this); + const BlockArray& blocks = wab.active_array(); + // Count access is racy, but don't care. + total_size += blocks.block_count() * Block::allocation_size(); + total_size += blocks.size() * sizeof(Block*); return total_size; } // Parallel iteration support -static char* not_started_marker_dummy = NULL; -static void* const not_started_marker = ¬_started_marker_dummy; +uint OopStorage::BasicParState::default_estimated_thread_count(bool concurrent) { + return concurrent ? ConcGCThreads : ParallelGCThreads; +} -OopStorage::BasicParState::BasicParState(OopStorage* storage, bool concurrent) : +OopStorage::BasicParState::BasicParState(const OopStorage* storage, + uint estimated_thread_count, + bool concurrent) : _storage(storage), - _next_block(not_started_marker), + _active_array(_storage->obtain_active_array()), + _block_count(0), // initialized properly below + _next_block(0), + _estimated_thread_count(estimated_thread_count), _concurrent(concurrent) { + assert(estimated_thread_count > 0, "estimated thread count must be positive"); update_iteration_state(true); + // Get the block count *after* iteration state updated, so concurrent + // empty block deletion is suppressed and can't reduce the count. But + // ensure the count we use was written after the block with that count + // was fully initialized; see BlockArray::push. + _block_count = _active_array->block_count_acquire(); } OopStorage::BasicParState::~BasicParState() { + _storage->relinquish_block_array(_active_array); update_iteration_state(false); } @@ -711,29 +942,49 @@ } } -void OopStorage::BasicParState::ensure_iteration_started() { - if (!_concurrent) { - assert_at_safepoint(); +bool OopStorage::BasicParState::claim_next_segment(IterationData* data) { + data->_processed += data->_segment_end - data->_segment_start; + size_t start = OrderAccess::load_acquire(&_next_block); + if (start >= _block_count) { + return finish_iteration(data); // No more blocks available. } - assert(!_concurrent || _storage->_concurrent_iteration_active, "invariant"); - // Ensure _next_block is not the not_started_marker, setting it to - // the _active_head to start the iteration if necessary. - if (OrderAccess::load_acquire(&_next_block) == not_started_marker) { - Atomic::cmpxchg(_storage->_active_head, &_next_block, not_started_marker); + // Try to claim several at a time, but not *too* many. We want to + // avoid deciding there are many available and selecting a large + // quantity, get delayed, and then end up claiming most or all of + // the remaining largish amount of work, leaving nothing for other + // threads to do. But too small a step can lead to contention + // over _next_block, esp. when the work per block is small. + size_t max_step = 10; + size_t remaining = _block_count - start; + size_t step = MIN2(max_step, 1 + (remaining / _estimated_thread_count)); + // Atomic::add with possible overshoot. This can perform better + // than a CAS loop on some platforms when there is contention. + // We can cope with the uncertainty by recomputing start/end from + // the result of the add, and dealing with potential overshoot. + size_t end = Atomic::add(step, &_next_block); + // _next_block may have changed, so recompute start from result of add. + start = end - step; + // _next_block may have changed so much that end has overshot. + end = MIN2(end, _block_count); + // _next_block may have changed so much that even start has overshot. + if (start < _block_count) { + // Record claimed segment for iteration. + data->_segment_start = start; + data->_segment_end = end; + return true; // Success. + } else { + // No more blocks to claim. + return finish_iteration(data); } - assert(_next_block != not_started_marker, "postcondition"); } -OopStorage::Block* OopStorage::BasicParState::claim_next_block() { - assert(_next_block != not_started_marker, "Iteration not started"); - void* next = _next_block; - while (next != NULL) { - void* new_next = _storage->_active_list.next(*static_cast(next)); - void* fetched = Atomic::cmpxchg(new_next, &_next_block, next); - if (fetched == next) break; // Claimed. - next = fetched; - } - return static_cast(next); +bool OopStorage::BasicParState::finish_iteration(const IterationData* data) const { + log_debug(oopstorage, blocks, stats) + ("Parallel iteration on %s: blocks = " SIZE_FORMAT + ", processed = " SIZE_FORMAT " (%2.f%%)", + _storage->name(), _block_count, data->_processed, + percent_of(data->_processed, _block_count)); + return false; } const char* OopStorage::name() const { return _name; } @@ -742,7 +993,7 @@ void OopStorage::print_on(outputStream* st) const { size_t allocations = _allocation_count; - size_t blocks = _block_count; + size_t blocks = _active_array->block_count(); double data_size = section_size * section_count; double alloc_percentage = percent_of((double)allocations, blocks * data_size); diff -r 19829b375d08 -r 9f758f0bb058 src/hotspot/share/gc/shared/oopStorage.hpp --- a/src/hotspot/share/gc/shared/oopStorage.hpp Thu May 03 14:13:20 2018 -0700 +++ b/src/hotspot/share/gc/shared/oopStorage.hpp Thu May 03 17:36:50 2018 -0400 @@ -170,27 +170,11 @@ // classes. C++03 introduced access for nested classes with DR45, but xlC // version 12 rejects it. NOT_AIX( private: ) - class Block; // Forward decl; defined in .inline.hpp file. - class BlockList; // Forward decl for BlockEntry friend decl. - - class BlockEntry { - friend class BlockList; + class Block; // Fixed-size array of oops, plus bookkeeping. + class BlockArray; // Array of Blocks, plus bookkeeping. + class BlockEntry; // Provides BlockList links in a Block. - // Members are mutable, and we deal exclusively with pointers to - // const, to make const blocks easier to use; a block being const - // doesn't prevent modifying its list state. - mutable const Block* _prev; - mutable const Block* _next; - - // Noncopyable. - BlockEntry(const BlockEntry&); - BlockEntry& operator=(const BlockEntry&); - - public: - BlockEntry(); - ~BlockEntry(); - }; - + // Doubly-linked list of Blocks. class BlockList { const Block* _head; const Block* _tail; @@ -205,6 +189,7 @@ ~BlockList(); Block* head(); + Block* tail(); const Block* chead() const; const Block* ctail() const; @@ -219,19 +204,34 @@ void unlink(const Block& block); }; + // RCU-inspired protection of access to _active_array. + class ProtectActive { + volatile uint _enter; + volatile uint _exit[2]; + + public: + ProtectActive(); + + uint read_enter(); + void read_exit(uint enter_value); + void write_synchronize(); + }; + private: const char* _name; - BlockList _active_list; + BlockArray* _active_array; BlockList _allocate_list; - Block* volatile _active_head; Block* volatile _deferred_updates; Mutex* _allocate_mutex; Mutex* _active_mutex; - // Counts are volatile for racy unlocked accesses. + // Volatile for racy unlocked accesses. volatile size_t _allocation_count; - volatile size_t _block_count; + + // Protection for _active_array. + mutable ProtectActive _protect_active; + // mutable because this gets set even for const iteration. mutable bool _concurrent_iteration_active; @@ -239,6 +239,13 @@ void delete_empty_block(const Block& block); bool reduce_deferred_updates(); + // Managing _active_array. + bool expand_active_array(); + void replace_active_array(BlockArray* new_array); + BlockArray* obtain_active_array() const; + void relinquish_block_array(BlockArray* array) const; + class WithActiveArray; // RAII helper for active array access. + template static bool iterate_impl(F f, Storage* storage); diff -r 19829b375d08 -r 9f758f0bb058 src/hotspot/share/gc/shared/oopStorage.inline.hpp --- a/src/hotspot/share/gc/shared/oopStorage.inline.hpp Thu May 03 14:13:20 2018 -0700 +++ b/src/hotspot/share/gc/shared/oopStorage.inline.hpp Thu May 03 17:36:50 2018 -0400 @@ -30,10 +30,107 @@ #include "metaprogramming/isConst.hpp" #include "oops/oop.hpp" #include "runtime/safepoint.hpp" +#include "utilities/align.hpp" #include "utilities/count_trailing_zeros.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" +// Array of all active blocks. Refcounted for lock-free reclaim of +// old array when a new array is allocated for expansion. +class OopStorage::BlockArray { + friend class OopStorage::TestAccess; + + size_t _size; + volatile size_t _block_count; + mutable volatile int _refcount; + // Block* _blocks[1]; // Pseudo flexible array member. + + BlockArray(size_t size); + ~BlockArray(); + + // Noncopyable + BlockArray(const BlockArray&); + BlockArray& operator=(const BlockArray&); + + static size_t blocks_offset(); + Block* const* base_ptr() const; + + Block* const* block_ptr(size_t index) const; + Block** block_ptr(size_t index); + +public: + static BlockArray* create(size_t size, AllocFailType alloc_fail = AllocFailStrategy::EXIT_OOM); + static void destroy(BlockArray* ba); + + inline Block* at(size_t i) const; + + size_t size() const; + size_t block_count() const; + size_t block_count_acquire() const; + void increment_refcount() const; + bool decrement_refcount() const; // Return true if zero, otherwise false + + // Support for OopStorage::allocate. + // Add block to the end of the array. Updates block count at the + // end of the operation, with a release_store. Returns true if the + // block was added, false if there was no room available. + // precondition: owner's _allocation_mutex is locked, or at safepoint. + bool push(Block* block); + + // Support OopStorage::delete_empty_blocks_xxx operations. + // Remove block from the array. + // precondition: block must be present at its active_index element. + void remove(Block* block); + + void copy_from(const BlockArray* from); +}; + +inline size_t OopStorage::BlockArray::blocks_offset() { + return align_up(sizeof(BlockArray), sizeof(Block*)); +} + +inline OopStorage::Block* const* OopStorage::BlockArray::base_ptr() const { + const void* ptr = reinterpret_cast(this) + blocks_offset(); + return reinterpret_cast(ptr); +} + +inline OopStorage::Block* const* OopStorage::BlockArray::block_ptr(size_t index) const { + return base_ptr() + index; +} + +inline OopStorage::Block** OopStorage::BlockArray::block_ptr(size_t index) { + return const_cast(base_ptr() + index); +} + +inline OopStorage::Block* OopStorage::BlockArray::at(size_t index) const { + assert(index < _block_count, "precondition"); + return *block_ptr(index); +} + +// A Block has an embedded BlockEntry to provide the links between +// Blocks in a BlockList. +class OopStorage::BlockEntry { + friend class OopStorage::BlockList; + + // Members are mutable, and we deal exclusively with pointers to + // const, to make const blocks easier to use; a block being const + // doesn't prevent modifying its list state. + mutable const Block* _prev; + mutable const Block* _next; + + // Noncopyable. + BlockEntry(const BlockEntry&); + BlockEntry& operator=(const BlockEntry&); + +public: + BlockEntry(); + ~BlockEntry(); +}; + +// Fixed-sized array of oops, plus bookkeeping data. +// All blocks are in the storage's _active_array, at the block's _active_index. +// Non-full blocks are in the storage's _allocate_list, linked through the +// block's _allocate_entry. Empty blocks are at the end of that list. class OopStorage::Block /* No base class, to avoid messing up alignment. */ { // _data must be the first non-static data member, for alignment. oop _data[BitsPerWord]; @@ -42,7 +139,7 @@ volatile uintx _allocated_bitmask; // One bit per _data element. const OopStorage* _owner; void* _memory; // Unaligned storage containing block. - BlockEntry _active_entry; + size_t _active_index; BlockEntry _allocate_entry; Block* volatile _deferred_updates_next; volatile uintx _release_refcount; @@ -61,7 +158,6 @@ Block& operator=(const Block&); public: - static const BlockEntry& get_active_entry(const Block& block); static const BlockEntry& get_allocate_entry(const Block& block); static size_t allocation_size(); @@ -84,6 +180,10 @@ bool contains(const oop* ptr) const; + size_t active_index() const; + void set_active_index(size_t index); + static size_t active_index_safe(const Block* block); // Returns 0 if access fails. + // Returns NULL if ptr is not in a block or not allocated in that block. static Block* block_for_ptr(const OopStorage* owner, const oop* ptr); @@ -101,6 +201,10 @@ return const_cast(_head); } +inline OopStorage::Block* OopStorage::BlockList::tail() { + return const_cast(_tail); +} + inline const OopStorage::Block* OopStorage::BlockList::chead() const { return _head; } @@ -253,9 +357,10 @@ // Propagate const/non-const iteration to the block layer, by using // const or non-const blocks as corresponding to Storage. typedef typename Conditional::value, const Block*, Block*>::type BlockPtr; - for (BlockPtr block = storage->_active_head; - block != NULL; - block = storage->_active_list.next(*block)) { + BlockArray* blocks = storage->_active_array; + size_t limit = blocks->block_count(); + for (size_t i = 0; i < limit; ++i) { + BlockPtr block = blocks->at(i); if (!block->iterate(f)) { return false; } diff -r 19829b375d08 -r 9f758f0bb058 src/hotspot/share/gc/shared/oopStorageParState.hpp --- a/src/hotspot/share/gc/shared/oopStorageParState.hpp Thu May 03 14:13:20 2018 -0700 +++ b/src/hotspot/share/gc/shared/oopStorageParState.hpp Thu May 03 17:36:50 2018 -0400 @@ -36,9 +36,8 @@ // // Concurrent Iteration // -// Iteration involves the _active_list, which contains all of the blocks owned -// by a storage object. This is a doubly-linked list, linked through -// dedicated fields in the blocks. +// Iteration involves the _active_array (a BlockArray), which contains all of +// the blocks owned by a storage object. // // At most one concurrent ParState can exist at a time for a given storage // object. @@ -48,27 +47,29 @@ // sets it false when the state is destroyed. These assignments are made with // _active_mutex locked. Meanwhile, empty block deletion is not done while // _concurrent_iteration_active is true. The flag check and the dependent -// removal of a block from the _active_list is performed with _active_mutex +// removal of a block from the _active_array is performed with _active_mutex // locked. This prevents concurrent iteration and empty block deletion from // interfering with with each other. // // Both allocate() and delete_empty_blocks_concurrent() lock the -// _allocate_mutex while performing their respective list manipulations, -// preventing them from interfering with each other. +// _allocate_mutex while performing their respective list and array +// manipulations, preventing them from interfering with each other. // -// When allocate() creates a new block, it is added to the front of the -// _active_list. Then _active_head is set to the new block. When concurrent -// iteration is started (by a parallel worker thread calling the state's -// iterate() function), the current _active_head is used as the initial block -// for the iteration, with iteration proceeding down the list headed by that -// block. +// When allocate() creates a new block, it is added to the end of the +// _active_array. Then _active_array's _block_count is incremented to account +// for the new block. When concurrent iteration is started (by a parallel +// worker thread calling the state's iterate() function), the current +// _active_array and its _block_count are captured for use by the iteration, +// with iteration processing all blocks in that array up to that block count. // -// As a result, the list over which concurrent iteration operates is stable. -// However, once the iteration is started, later allocations may add blocks to -// the front of the list that won't be examined by the iteration. And while -// the list is stable, concurrent allocate() and release() operations may -// change the set of allocated entries in a block at any time during the -// iteration. +// As a result, the sequence over which concurrent iteration operates is +// stable. However, once the iteration is started, later allocations may add +// blocks to the end of the array that won't be examined by the iteration. +// An allocation may even require expansion of the array, so the iteration is +// no longer processing the current array, but rather the previous one. +// And while the sequence is stable, concurrent allocate() and release() +// operations may change the set of allocated entries in a block at any time +// during the iteration. // // As a result, a concurrent iteration handler must accept that some // allocations and releases that occur after the iteration started will not be @@ -138,36 +139,49 @@ // invoked on p. class OopStorage::BasicParState { - OopStorage* _storage; - void* volatile _next_block; + const OopStorage* _storage; + BlockArray* _active_array; + size_t _block_count; + volatile size_t _next_block; + uint _estimated_thread_count; bool _concurrent; // Noncopyable. BasicParState(const BasicParState&); BasicParState& operator=(const BasicParState&); + struct IterationData; + void update_iteration_state(bool value); - void ensure_iteration_started(); - Block* claim_next_block(); + bool claim_next_segment(IterationData* data); + bool finish_iteration(const IterationData* data) const; // Wrapper for iteration handler; ignore handler result and return true. template class AlwaysTrueFn; public: - BasicParState(OopStorage* storage, bool concurrent); + BasicParState(const OopStorage* storage, + uint estimated_thread_count, + bool concurrent); ~BasicParState(); template void iterate(F f); + + static uint default_estimated_thread_count(bool concurrent); }; template class OopStorage::ParState { BasicParState _basic_state; + typedef typename Conditional::type StoragePtr; + public: - ParState(const OopStorage* storage) : - // For simplicity, always recorded as non-const. - _basic_state(const_cast(storage), concurrent) + ParState(StoragePtr storage, + uint estimated_thread_count = BasicParState::default_estimated_thread_count(concurrent)) : + _basic_state(storage, estimated_thread_count, concurrent) {} template void iterate(F f); @@ -179,8 +193,9 @@ BasicParState _basic_state; public: - ParState(OopStorage* storage) : - _basic_state(storage, false) + ParState(OopStorage* storage, + uint estimated_thread_count = BasicParState::default_estimated_thread_count(false)) : + _basic_state(storage, estimated_thread_count, false) {} template void iterate(F f); diff -r 19829b375d08 -r 9f758f0bb058 src/hotspot/share/gc/shared/oopStorageParState.inline.hpp --- a/src/hotspot/share/gc/shared/oopStorageParState.inline.hpp Thu May 03 14:13:20 2018 -0700 +++ b/src/hotspot/share/gc/shared/oopStorageParState.inline.hpp Thu May 03 17:36:50 2018 -0400 @@ -41,14 +41,26 @@ bool operator()(OopPtr ptr) const { _f(ptr); return true; } }; +struct OopStorage::BasicParState::IterationData { + size_t _segment_start; + size_t _segment_end; + size_t _processed; +}; + template inline void OopStorage::BasicParState::iterate(F f) { // Wrap f in ATF so we can use Block::iterate. AlwaysTrueFn atf_f(f); - ensure_iteration_started(); - typename Conditional::type block; - while ((block = claim_next_block()) != NULL) { - block->iterate(atf_f); + IterationData data = {}; // zero initialize. + while (claim_next_segment(&data)) { + assert(data._segment_start < data._segment_end, "invariant"); + assert(data._segment_end <= _block_count, "invariant"); + typedef typename Conditional::type BlockPtr; + size_t i = data._segment_start; + do { + BlockPtr block = _active_array->at(i); + block->iterate(atf_f); + } while (++i < data._segment_end); } } diff -r 19829b375d08 -r 9f758f0bb058 test/hotspot/gtest/gc/shared/test_oopStorage.cpp --- a/test/hotspot/gtest/gc/shared/test_oopStorage.cpp Thu May 03 14:13:20 2018 -0700 +++ b/test/hotspot/gtest/gc/shared/test_oopStorage.cpp Thu May 03 17:36:50 2018 -0400 @@ -53,9 +53,10 @@ public: typedef OopStorage::Block Block; typedef OopStorage::BlockList BlockList; + typedef OopStorage::BlockArray BlockArray; - static BlockList& active_list(OopStorage& storage) { - return storage._active_list; + static BlockArray& active_array(const OopStorage& storage) { + return *storage._active_array; } static BlockList& allocate_list(OopStorage& storage) { @@ -96,20 +97,25 @@ static size_t memory_per_block() { return Block::allocation_size(); } + + static void block_array_set_block_count(BlockArray* blocks, size_t count) { + blocks->_block_count = count; + } }; typedef OopStorage::TestAccess TestAccess; -// --- FIXME: Should be just Block, but that collides with opto Block -// when building with precompiled headers. There really should be -// an opto namespace. + +// The "Oop" prefix is to avoid collision with similar opto names when +// building with precompiled headers, or for consistency with that +// workaround. There really should be an opto namespace. typedef TestAccess::Block OopBlock; -// --- FIXME: Similarly, this typedef collides with opto BlockList. -// typedef TestAccess::BlockList BlockList; +typedef TestAccess::BlockList OopBlockList; +typedef TestAccess::BlockArray OopBlockArray; // Using EXPECT_EQ can't use NULL directly. Otherwise AIX build breaks. const OopBlock* const NULL_BLOCK = NULL; -static size_t list_length(const TestAccess::BlockList& list) { +static size_t list_length(const OopBlockList& list) { size_t result = 0; for (const OopBlock* block = list.chead(); block != NULL; @@ -119,7 +125,7 @@ return result; } -static void clear_list(TestAccess::BlockList& list) { +static void clear_list(OopBlockList& list) { OopBlock* next; for (OopBlock* block = list.head(); block != NULL; block = next) { next = list.next(*block); @@ -127,7 +133,7 @@ } } -static bool is_list_empty(const TestAccess::BlockList& list) { +static bool is_list_empty(const OopBlockList& list) { return list.chead() == NULL; } @@ -149,7 +155,7 @@ } static size_t empty_block_count(const OopStorage& storage) { - const TestAccess::BlockList& list = TestAccess::allocate_list(storage); + const OopBlockList& list = TestAccess::allocate_list(storage); size_t count = 0; for (const OopBlock* block = list.ctail(); (block != NULL) && block->is_empty(); @@ -158,6 +164,20 @@ return count; } +static size_t active_count(const OopStorage& storage) { + return TestAccess::active_array(storage).block_count(); +} + +static OopBlock* active_head(const OopStorage& storage) { + OopBlockArray& ba = TestAccess::active_array(storage); + size_t count = ba.block_count(); + if (count == 0) { + return NULL; + } else { + return ba.at(count - 1); + } +} + class OopStorageTest : public ::testing::Test { public: OopStorageTest(); @@ -188,7 +208,6 @@ OopStorageTest::~OopStorageTest() { clear_list(TestAccess::allocate_list(_storage)); - clear_list(TestAccess::active_list(_storage)); } class OopStorageTestWithAllocation : public OopStorageTest { @@ -227,7 +246,7 @@ static bool is_allocate_list_sorted(const OopStorage& storage) { // The allocate_list isn't strictly sorted. Rather, all empty // blocks are segregated to the end of the list. - const TestAccess::BlockList& list = TestAccess::allocate_list(storage); + const OopBlockList& list = TestAccess::allocate_list(storage); const OopBlock* block = list.ctail(); for ( ; (block != NULL) && block->is_empty(); block = list.prev(*block)) {} for ( ; block != NULL; block = list.prev(*block)) { @@ -238,25 +257,25 @@ return true; } -static size_t total_allocation_count(const TestAccess::BlockList& list) { +static size_t total_allocation_count(const OopStorage& storage) { size_t total_count = 0; - for (const OopBlock* block = list.chead(); - block != NULL; - block = list.next(*block)) { - total_count += TestAccess::block_allocation_count(*block); + const OopBlockArray& ba = TestAccess::active_array(storage); + size_t limit = active_count(storage); + for (size_t i = 0; i < limit; ++i) { + total_count += TestAccess::block_allocation_count(*ba.at(i)); } return total_count; } TEST_VM_F(OopStorageTest, allocate_one) { - EXPECT_TRUE(is_list_empty(TestAccess::active_list(_storage))); + EXPECT_EQ(0u, active_count(_storage)); EXPECT_TRUE(is_list_empty(TestAccess::allocate_list(_storage))); oop* ptr = _storage.allocate(); EXPECT_TRUE(ptr != NULL); EXPECT_EQ(1u, _storage.allocation_count()); - EXPECT_EQ(1u, list_length(TestAccess::active_list(_storage))); + EXPECT_EQ(1u, active_count(_storage)); EXPECT_EQ(1u, _storage.block_count()); EXPECT_EQ(1u, list_length(TestAccess::allocate_list(_storage))); @@ -264,7 +283,7 @@ const OopBlock* block = TestAccess::allocate_list(_storage).chead(); EXPECT_NE(block, (OopBlock*)NULL); - EXPECT_EQ(block, (TestAccess::active_list(_storage).chead())); + EXPECT_EQ(block, active_head(_storage)); EXPECT_FALSE(TestAccess::block_is_empty(*block)); EXPECT_FALSE(TestAccess::block_is_full(*block)); EXPECT_EQ(1u, TestAccess::block_allocation_count(*block)); @@ -272,7 +291,7 @@ release_entry(_storage, ptr); EXPECT_EQ(0u, _storage.allocation_count()); - EXPECT_EQ(1u, list_length(TestAccess::active_list(_storage))); + EXPECT_EQ(1u, active_count(_storage)); EXPECT_EQ(1u, _storage.block_count()); EXPECT_EQ(1u, list_length(TestAccess::allocate_list(_storage))); @@ -280,7 +299,7 @@ const OopBlock* new_block = TestAccess::allocate_list(_storage).chead(); EXPECT_EQ(block, new_block); - EXPECT_EQ(block, (TestAccess::active_list(_storage).chead())); + EXPECT_EQ(block, active_head(_storage)); EXPECT_TRUE(TestAccess::block_is_empty(*block)); EXPECT_FALSE(TestAccess::block_is_full(*block)); EXPECT_EQ(0u, TestAccess::block_allocation_count(*block)); @@ -290,20 +309,19 @@ static const size_t max_entries = 1000; oop* entries[max_entries]; - TestAccess::BlockList& active_list = TestAccess::active_list(_storage); - TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage); + OopBlockList& allocate_list = TestAccess::allocate_list(_storage); - EXPECT_TRUE(is_list_empty(active_list)); + EXPECT_EQ(0u, active_count(_storage)); EXPECT_EQ(0u, _storage.block_count()); EXPECT_TRUE(is_list_empty(allocate_list)); size_t allocated = 0; for ( ; allocated < max_entries; ++allocated) { EXPECT_EQ(allocated, _storage.allocation_count()); - if (!is_list_empty(active_list)) { - EXPECT_EQ(1u, list_length(active_list)); + if (active_count(_storage) != 0) { + EXPECT_EQ(1u, active_count(_storage)); EXPECT_EQ(1u, _storage.block_count()); - const OopBlock& block = *active_list.chead(); + const OopBlock& block = *TestAccess::active_array(_storage).at(0); EXPECT_EQ(allocated, TestAccess::block_allocation_count(block)); if (TestAccess::block_is_full(block)) { break; @@ -316,10 +334,10 @@ } EXPECT_EQ(allocated, _storage.allocation_count()); - EXPECT_EQ(1u, list_length(active_list)); + EXPECT_EQ(1u, active_count(_storage)); EXPECT_EQ(1u, _storage.block_count()); EXPECT_TRUE(is_list_empty(allocate_list)); - const OopBlock& block = *active_list.chead(); + const OopBlock& block = *TestAccess::active_array(_storage).at(0); EXPECT_TRUE(TestAccess::block_is_full(block)); EXPECT_EQ(allocated, TestAccess::block_allocation_count(block)); @@ -336,19 +354,18 @@ static const size_t max_entries = 1000; oop* entries[max_entries]; - TestAccess::BlockList& active_list = TestAccess::active_list(_storage); - TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage); + OopBlockList& allocate_list = TestAccess::allocate_list(_storage); EXPECT_EQ(0u, empty_block_count(_storage)); entries[0] = _storage.allocate(); ASSERT_TRUE(entries[0] != NULL); - EXPECT_EQ(1u, list_length(active_list)); + EXPECT_EQ(1u, active_count(_storage)); EXPECT_EQ(1u, _storage.block_count()); EXPECT_EQ(1u, list_length(allocate_list)); EXPECT_EQ(0u, empty_block_count(_storage)); - const OopBlock* block = active_list.chead(); + const OopBlock* block = TestAccess::active_array(_storage).at(0); EXPECT_EQ(1u, TestAccess::block_allocation_count(*block)); EXPECT_EQ(block, allocate_list.chead()); @@ -363,14 +380,14 @@ EXPECT_EQ(1u, list_length(allocate_list)); block = allocate_list.chead(); EXPECT_EQ(1u, TestAccess::block_allocation_count(*block)); - EXPECT_EQ(block, active_list.chead()); + EXPECT_EQ(block, active_head(_storage)); } else if (TestAccess::block_is_full(*block)) { EXPECT_TRUE(is_list_empty(allocate_list)); block = NULL; } else { EXPECT_FALSE(is_list_empty(allocate_list)); EXPECT_EQ(block, allocate_list.chead()); - EXPECT_EQ(block, active_list.chead()); + EXPECT_EQ(block, active_head(_storage)); } } @@ -378,20 +395,18 @@ EXPECT_NE(0u, TestAccess::block_allocation_count(*block)); EXPECT_FALSE(is_list_empty(allocate_list)); EXPECT_EQ(block, allocate_list.chead()); - EXPECT_EQ(block, active_list.chead()); + EXPECT_EQ(block, active_head(_storage)); } - size_t active_count = list_length(active_list); - for (size_t i = 0; i < max_entries; ++i) { release_entry(_storage, entries[i]); EXPECT_TRUE(is_allocate_list_sorted(_storage)); - EXPECT_EQ(max_entries - (i + 1), total_allocation_count(active_list)); + EXPECT_EQ(max_entries - (i + 1), total_allocation_count(_storage)); } - EXPECT_EQ(list_length(active_list), list_length(allocate_list)); - EXPECT_EQ(list_length(active_list), _storage.block_count()); - EXPECT_EQ(list_length(active_list), empty_block_count(_storage)); + EXPECT_EQ(active_count(_storage), list_length(allocate_list)); + EXPECT_EQ(active_count(_storage), _storage.block_count()); + EXPECT_EQ(active_count(_storage), empty_block_count(_storage)); for (const OopBlock* block = allocate_list.chead(); block != NULL; block = allocate_list.next(*block)) { @@ -405,10 +420,9 @@ EXPECT_EQ(0u, empty_block_count(_storage)); - TestAccess::BlockList& active_list = TestAccess::active_list(_storage); - TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage); + OopBlockList& allocate_list = TestAccess::allocate_list(_storage); - EXPECT_EQ(_max_entries, total_allocation_count(active_list)); + EXPECT_EQ(_max_entries, total_allocation_count(_storage)); EXPECT_GE(1u, list_length(allocate_list)); // Release all entries in "random" order. @@ -418,14 +432,14 @@ release_entry(_storage, _entries[i]); _entries[i] = NULL; ++released; - EXPECT_EQ(_max_entries - released, total_allocation_count(active_list)); + EXPECT_EQ(_max_entries - released, total_allocation_count(_storage)); EXPECT_TRUE(is_allocate_list_sorted(_storage)); } } - EXPECT_EQ(list_length(active_list), list_length(allocate_list)); - EXPECT_EQ(list_length(active_list), _storage.block_count()); - EXPECT_EQ(0u, total_allocation_count(active_list)); + EXPECT_EQ(active_count(_storage), list_length(allocate_list)); + EXPECT_EQ(active_count(_storage), _storage.block_count()); + EXPECT_EQ(0u, total_allocation_count(_storage)); EXPECT_EQ(list_length(allocate_list), empty_block_count(_storage)); } @@ -436,10 +450,9 @@ EXPECT_EQ(0u, empty_block_count(_storage)); - TestAccess::BlockList& active_list = TestAccess::active_list(_storage); - TestAccess::BlockList& allocate_list = TestAccess::allocate_list(_storage); + OopBlockList& allocate_list = TestAccess::allocate_list(_storage); - EXPECT_EQ(_max_entries, total_allocation_count(active_list)); + EXPECT_EQ(_max_entries, total_allocation_count(_storage)); EXPECT_GE(1u, list_length(allocate_list)); // Release all entries in "random" order, "randomly" interspersed @@ -452,20 +465,20 @@ _entries[i] = NULL; ++released; ++total_released; - EXPECT_EQ(_max_entries - released, total_allocation_count(active_list)); + EXPECT_EQ(_max_entries - released, total_allocation_count(_storage)); EXPECT_TRUE(is_allocate_list_sorted(_storage)); if (total_released % allocate_step == 0) { _entries[i] = _storage.allocate(); --released; - EXPECT_EQ(_max_entries - released, total_allocation_count(active_list)); + EXPECT_EQ(_max_entries - released, total_allocation_count(_storage)); EXPECT_TRUE(is_allocate_list_sorted(_storage)); } } } - EXPECT_EQ(list_length(active_list), list_length(allocate_list)); - EXPECT_EQ(list_length(active_list), _storage.block_count()); - EXPECT_EQ(0u, total_allocation_count(active_list)); + EXPECT_EQ(active_count(_storage), list_length(allocate_list)); + EXPECT_EQ(active_count(_storage), _storage.block_count()); + EXPECT_EQ(0u, total_allocation_count(_storage)); EXPECT_EQ(list_length(allocate_list), empty_block_count(_storage)); } @@ -1015,9 +1028,7 @@ } TEST_VM_F(OopStorageTestWithAllocation, delete_empty_blocks_safepoint) { - TestAccess::BlockList& active_list = TestAccess::active_list(_storage); - - size_t initial_active_size = list_length(active_list); + size_t initial_active_size = active_count(_storage); EXPECT_EQ(initial_active_size, _storage.block_count()); ASSERT_LE(3u, initial_active_size); // Need at least 3 blocks for test @@ -1026,7 +1037,7 @@ release_entry(_storage, _entries[i]); } - EXPECT_EQ(initial_active_size, list_length(active_list)); + EXPECT_EQ(initial_active_size, active_count(_storage)); EXPECT_EQ(initial_active_size, _storage.block_count()); EXPECT_EQ(3u, empty_block_count(_storage)); @@ -1036,14 +1047,12 @@ VMThread::execute(&op); } EXPECT_EQ(0u, empty_block_count(_storage)); - EXPECT_EQ(initial_active_size - 3, list_length(active_list)); + EXPECT_EQ(initial_active_size - 3, active_count(_storage)); EXPECT_EQ(initial_active_size - 3, _storage.block_count()); } TEST_VM_F(OopStorageTestWithAllocation, delete_empty_blocks_concurrent) { - TestAccess::BlockList& active_list = TestAccess::active_list(_storage); - - size_t initial_active_size = list_length(active_list); + size_t initial_active_size = active_count(_storage); EXPECT_EQ(initial_active_size, _storage.block_count()); ASSERT_LE(3u, initial_active_size); // Need at least 3 blocks for test @@ -1052,13 +1061,13 @@ release_entry(_storage, _entries[i]); } - EXPECT_EQ(initial_active_size, list_length(active_list)); + EXPECT_EQ(initial_active_size, active_count(_storage)); EXPECT_EQ(initial_active_size, _storage.block_count()); EXPECT_EQ(3u, empty_block_count(_storage)); _storage.delete_empty_blocks_concurrent(); EXPECT_EQ(0u, empty_block_count(_storage)); - EXPECT_EQ(initial_active_size - 3, list_length(active_list)); + EXPECT_EQ(initial_active_size - 3, active_count(_storage)); EXPECT_EQ(initial_active_size - 3, _storage.block_count()); } @@ -1161,23 +1170,21 @@ #endif // !PRODUCT -////////////////////////////////////////////////////////////////////////////// -// Unit tests for block lists - -class OopStorageBlockListTest : public ::testing::Test { -public: - OopStorageBlockListTest() { +class OopStorageBlockCollectionTest : public ::testing::Test { +protected: + OopStorageBlockCollectionTest() { for (size_t i = 0; i < nvalues; ++i) { values[i] = OopBlock::new_block(pseudo_owner()); } } - ~OopStorageBlockListTest() { + ~OopStorageBlockCollectionTest() { for (size_t i = 0; i < nvalues; ++i) { OopBlock::delete_block(*values[i]); } } +public: static const size_t nvalues = 10; OopBlock* values[nvalues]; @@ -1190,11 +1197,13 @@ } }; -const size_t OopStorageBlockListTest::nvalues; -const void* const OopStorageBlockListTest::_pseudo_owner[] = {}; +const size_t OopStorageBlockCollectionTest::nvalues; +const void* const OopStorageBlockCollectionTest::_pseudo_owner[] = {}; + +class OopStorageBlockListTest : public OopStorageBlockCollectionTest {}; TEST_F(OopStorageBlockListTest, empty_list) { - TestAccess::BlockList list(&OopBlock::get_active_entry); + OopBlockList list(&OopBlock::get_allocate_entry); EXPECT_TRUE(is_list_empty(list)); EXPECT_EQ(NULL_BLOCK, list.head()); @@ -1203,7 +1212,7 @@ } TEST_F(OopStorageBlockListTest, push_back) { - TestAccess::BlockList list(&OopBlock::get_active_entry); + OopBlockList list(&OopBlock::get_allocate_entry); for (size_t i = 0; i < nvalues; ++i) { list.push_back(*values[i]); @@ -1233,7 +1242,7 @@ } TEST_F(OopStorageBlockListTest, push_front) { - TestAccess::BlockList list(&OopBlock::get_active_entry); + OopBlockList list(&OopBlock::get_allocate_entry); for (size_t i = 0; i < nvalues; ++i) { list.push_front(*values[i]); @@ -1264,7 +1273,7 @@ class OopStorageBlockListTestWithList : public OopStorageBlockListTest { public: - OopStorageBlockListTestWithList() : list(&OopBlock::get_active_entry) { + OopStorageBlockListTestWithList() : list(&OopBlock::get_allocate_entry) { for (size_t i = 0; i < nvalues; ++i) { list.push_back(*values[i]); } @@ -1274,7 +1283,7 @@ clear_list(list); } - TestAccess::BlockList list; + OopBlockList list; }; TEST_F(OopStorageBlockListTestWithList, unlink_front) { @@ -1336,7 +1345,7 @@ } TEST_F(OopStorageBlockListTest, single) { - TestAccess::BlockList list(&OopBlock::get_active_entry); + OopBlockList list(&OopBlock::get_allocate_entry); list.push_back(*values[0]); EXPECT_EQ(NULL_BLOCK, list.next(*values[0])); @@ -1351,31 +1360,79 @@ EXPECT_EQ(NULL_BLOCK, list.ctail()); } -TEST_F(OopStorageBlockListTestWithList, two_lists) { - TestAccess::BlockList list2(&OopBlock::get_allocate_entry); - for (size_t i = 0; i < nvalues; ++i) { - list2.push_front(*values[i]); +class OopStorageBlockArrayTest : public OopStorageBlockCollectionTest {}; + +TEST_F(OopStorageBlockArrayTest, empty_array) { + OopBlockArray* a = OopBlockArray::create(nvalues); + + EXPECT_EQ(nvalues, a->size()); + EXPECT_EQ(0u, a->block_count_acquire()); + TestAccess::block_array_set_block_count(a, 2); + EXPECT_EQ(2u, a->block_count_acquire()); + TestAccess::block_array_set_block_count(a, 0); + a->increment_refcount(); + a->increment_refcount(); + EXPECT_FALSE(a->decrement_refcount()); + EXPECT_TRUE(a->decrement_refcount()); + + OopBlockArray::destroy(a); +} + +TEST_F(OopStorageBlockArrayTest, push) { + OopBlockArray* a = OopBlockArray::create(nvalues - 1); + + for (size_t i = 0; i < nvalues - 1; ++i) { + EXPECT_TRUE(a->push(values[i])); + EXPECT_EQ(i + 1, a->block_count_acquire()); + EXPECT_EQ(values[i], a->at(i)); + } + EXPECT_FALSE(a->push(values[nvalues - 1])); + + TestAccess::block_array_set_block_count(a, 0); + OopBlockArray::destroy(a); +} + +class OopStorageBlockArrayTestWithArray : public OopStorageBlockArrayTest { +public: + OopStorageBlockArrayTestWithArray() : a(OopBlockArray::create(nvalues)) { + for (size_t i = 0; i < nvalues; ++i) { + a->push(values[i]); + } } - const OopBlock* active_block = list.chead(); - const OopBlock* allocate_block = list2.ctail(); - for (size_t i = 0; i < nvalues; ++i) { - EXPECT_EQ(active_block, allocate_block); - active_block = list.next(*active_block); - allocate_block = list2.prev(*allocate_block); + ~OopStorageBlockArrayTestWithArray() { + TestAccess::block_array_set_block_count(a, 0); + OopBlockArray::destroy(a); } - EXPECT_EQ(NULL_BLOCK, active_block); - EXPECT_EQ(NULL_BLOCK, allocate_block); + + OopBlockArray* a; +}; + +TEST_F(OopStorageBlockArrayTestWithArray, remove0) { + a->remove(values[0]); + EXPECT_EQ(nvalues - 1, a->block_count_acquire()); + EXPECT_EQ(values[nvalues - 1], a->at(0)); + for (size_t i = 1; i < nvalues - 1; ++i) { + EXPECT_EQ(values[i], a->at(i)); + } +} - for (size_t i = 0; i < nvalues; ++i) { - list2.unlink(*values[i]); +TEST_F(OopStorageBlockArrayTestWithArray, remove3) { + a->remove(values[3]); + EXPECT_EQ(nvalues - 1, a->block_count_acquire()); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(values[i], a->at(i)); } - EXPECT_TRUE(is_list_empty(list2)); + EXPECT_EQ(values[nvalues - 1], a->at(3)); + for (size_t i = 4; i < nvalues - 1; ++i) { + EXPECT_EQ(values[i], a->at(i)); + } +} - active_block = list.chead(); - for (size_t i = 0; i < nvalues; ++i) { - EXPECT_EQ(active_block, values[i]); - active_block = list.next(*active_block); +TEST_F(OopStorageBlockArrayTestWithArray, remove_last) { + a->remove(values[nvalues - 1]); + EXPECT_EQ(nvalues - 1, a->block_count_acquire()); + for (size_t i = 0; i < nvalues - 1; ++i) { + EXPECT_EQ(values[i], a->at(i)); } - EXPECT_EQ(NULL_BLOCK, active_block); } diff -r 19829b375d08 -r 9f758f0bb058 test/hotspot/gtest/gc/shared/test_oopStorage_parperf.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/hotspot/gtest/gc/shared/test_oopStorage_parperf.cpp Thu May 03 17:36:50 2018 -0400 @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "gc/shared/oopStorage.inline.hpp" +#include "gc/shared/oopStorageParState.inline.hpp" +#include "gc/shared/workgroup.hpp" +#include "logging/log.hpp" +#include "logging/logConfiguration.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/iterator.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/os.hpp" +#include "runtime/thread.hpp" +#include "runtime/vm_operations.hpp" +#include "runtime/vmThread.hpp" +#include "utilities/debug.hpp" +#include "utilities/ostream.hpp" +#include "utilities/ticks.inline.hpp" + +#include "unittest.hpp" + +// This "test" doesn't really verify much. Rather, it's mostly a +// microbenchmark for OopStorage parallel iteration. It executes +// parallel iteration with varying numbers of threads on an storage +// object containing a large number of entries, and logs some stats +// about the distribution and performance of the iteration. + +// Parallel iteration not available unless INCLUDE_ALL_GCS +#if INCLUDE_ALL_GCS + +const uint _max_workers = 10; +static uint _num_workers = 0; +const size_t _storage_entries = 1000000; + +class OopStorageParIterPerf : public ::testing::Test { +public: + OopStorageParIterPerf(); + ~OopStorageParIterPerf(); + + WorkGang* workers() const; + + class VM_ParStateTime; + class Task; + class Closure; + + Tickspan run_task(Task* task, uint nthreads); + void show_task(const Task* task, Tickspan duration, uint nthreads); + void run_test(uint nthreads); + + static WorkGang* _workers; + + static const int _active_rank = Mutex::leaf - 1; + static const int _allocate_rank = Mutex::leaf; + + Mutex _allocate_mutex; + Mutex _active_mutex; + OopStorage _storage; + oop* _entries[_storage_entries]; +}; + +WorkGang* OopStorageParIterPerf::_workers = NULL; + +WorkGang* OopStorageParIterPerf::workers() const { + if (_workers == NULL) { + WorkGang* wg = new WorkGang("OopStorageParIterPerf workers", + _num_workers, + false, + false); + wg->initialize_workers(); + wg->update_active_workers(_num_workers); + _workers = wg; + } + return _workers; +} + +OopStorageParIterPerf::OopStorageParIterPerf() : + _allocate_mutex(_allocate_rank, + "test_OopStorage_parperf_allocate", + false, + Mutex::_safepoint_check_never), + _active_mutex(_active_rank, + "test_OopStorage_parperf_active", + false, + Mutex::_safepoint_check_never), + _storage("Test Storage", &_allocate_mutex, &_active_mutex) +{ + for (size_t i = 0; i < _storage_entries; ++i) { + _entries[i] = _storage.allocate(); + } + _num_workers = MIN2(_max_workers, (uint)os::processor_count()); +} + +OopStorageParIterPerf::~OopStorageParIterPerf() { + _storage.release(_entries, ARRAY_SIZE(_entries)); +} + +class OopStorageParIterPerf::VM_ParStateTime : public VM_GTestExecuteAtSafepoint { +public: + VM_ParStateTime(WorkGang* workers, AbstractGangTask* task, uint nthreads) : + _workers(workers), _task(task), _nthreads(nthreads) + {} + + void doit() { + _workers->run_task(_task, _nthreads); + } + +private: + WorkGang* _workers; + AbstractGangTask* _task; + uint _nthreads; +}; + +class OopStorageParIterPerf::Task : public AbstractGangTask { + typedef OopStorage::ParState StateType; + + Tickspan* _worker_times; + StateType _state; + OopClosure* _closure; + +public: + Task(OopStorage* storage, OopClosure* closure, uint nthreads) : + AbstractGangTask("OopStorageParIterPerf::Task"), + _worker_times(NULL), + _state(storage, nthreads), + _closure(closure) + { + Tickspan* wtimes = NEW_C_HEAP_ARRAY(Tickspan, _num_workers, mtInternal); + for (uint i = 0; i < _num_workers; ++i) { + new (&wtimes[i]) Tickspan(); + } + _worker_times = wtimes; + } + + ~Task() { + FREE_C_HEAP_ARRAY(Tickspan, _worker_times); + } + + virtual void work(uint worker_id) { + Ticks start_time = Ticks::now(); + _state.oops_do(_closure); + _worker_times[worker_id] = Ticks::now() - start_time; + } + + const Tickspan* worker_times() const { return _worker_times; } +}; + +class OopStorageParIterPerf::Closure : public OopClosure { +public: + virtual void do_oop(oop* p) { guarantee(*p == NULL, "expected NULL"); } + virtual void do_oop(narrowOop* p) { ShouldNotReachHere(); } +}; + +Tickspan OopStorageParIterPerf::run_task(Task* task, uint nthreads) { + tty->print_cr("Running test with %u threads", nthreads); + VM_ParStateTime op(workers(), task, nthreads); + ThreadInVMfromNative invm(JavaThread::current()); + Ticks start_time = Ticks::now(); + VMThread::execute(&op); + return Ticks::now() - start_time; +} + +void OopStorageParIterPerf::show_task(const Task* task, Tickspan duration, uint nthreads) { + tty->print_cr("Run test with %u threads: " JLONG_FORMAT, nthreads, duration.value()); + const Tickspan* wtimes = task->worker_times(); + for (uint i = 0; i < _num_workers; ++i) { + if (wtimes[i] != Tickspan()) { + tty->print_cr(" %u: " JLONG_FORMAT, i, wtimes[i].value()); + } + } + tty->cr(); +} + +void OopStorageParIterPerf::run_test(uint nthreads) { + if (nthreads <= _num_workers) { + SCOPED_TRACE(err_msg("Running test with %u threads", nthreads).buffer()); + Closure closure; + Task task(&_storage, &closure, nthreads); + Tickspan t = run_task(&task, nthreads); + show_task(&task, t, nthreads); + } +} + +TEST_VM_F(OopStorageParIterPerf, test) { + // Enable additional interesting logging. +#define TEST_TAGS oopstorage, blocks, stats + // There isn't an obvious way to capture the old log level so it + // can be restored here, so just use Warning as the "default". + LogLevelType old_level = LogLevel::Warning; + if (log_is_enabled(Debug, TEST_TAGS)) { + old_level = LogLevel::Debug; + } else if (log_is_enabled(Info, TEST_TAGS)) { + old_level = LogLevel::Info; + } + bool debug_enabled = old_level == LogLevel::Debug; + if (!debug_enabled) { + LogConfiguration::configure_stdout(LogLevel::Debug, true, LOG_TAGS(TEST_TAGS)); + } + + run_test(1); + run_test(2); + run_test(3); + run_test(4); + run_test(6); + run_test(8); + run_test(10); + + if (!debug_enabled) { + LogConfiguration::configure_stdout(old_level, true, LOG_TAGS(TEST_TAGS)); + } +} + +#endif // INCLUDE_ALL_GCS