# HG changeset patch # User tschatzl # Date 1473950659 -7200 # Node ID ff9f64534cffcb9a5c6acbced21095645a131f22 # Parent 68d3c7c391532f693e966afd247c914e8acd9142 8159422: Very high Concurrent Mark mark stack contention Summary: Decrease contention on mark stack by splitting locks, and minimizing the amount of time these locks are held. Improve mark stack management. Reviewed-by: kbarrett, mgerdin, eosterlund diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Thu Sep 15 16:44:19 2016 +0200 @@ -3165,7 +3165,6 @@ assert(_verifier->check_cset_fast_test(), "Inconsistency in the InCSetState table."); - _cm->note_start_of_gc(); // We call this after finalize_cset() to // ensure that the CSet has been finalized. _cm->verify_no_cset_oops(); @@ -3251,7 +3250,6 @@ // We redo the verification but now wrt to the new CSet which // has just got initialized after the previous CSet was freed. _cm->verify_no_cset_oops(); - _cm->note_end_of_gc(); // This timing is only used by the ergonomics to handle our pause target. // It is unclear why this should not include the full pause. We will diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Thu Sep 15 16:44:19 2016 +0200 @@ -133,129 +133,184 @@ } G1CMMarkStack::G1CMMarkStack() : - _reserved_space(), + _max_chunk_capacity(0), _base(NULL), - _capacity(0), - _saved_index((size_t)AllBits), + _chunk_capacity(0), + _out_of_memory(false), _should_expand(false) { set_empty(); } bool G1CMMarkStack::resize(size_t new_capacity) { assert(is_empty(), "Only resize when stack is empty."); - assert(new_capacity <= MarkStackSizeMax, - "Trying to resize stack to " SIZE_FORMAT " elements when the maximum is " SIZE_FORMAT, new_capacity, MarkStackSizeMax); - - size_t reservation_size = ReservedSpace::allocation_align_size_up(new_capacity * sizeof(oop)); - - ReservedSpace rs(reservation_size); - if (!rs.is_reserved()) { - log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " elements and size " SIZE_FORMAT "B.", new_capacity, reservation_size); + assert(new_capacity <= _max_chunk_capacity, + "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); + + OopChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity); + + if (new_base == NULL) { + log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk)); return false; } - - VirtualSpace vs; - - if (!vs.initialize(rs, rs.size())) { - rs.release(); - log_warning(gc)("Failed to commit memory for new overflow mark stack of size " SIZE_FORMAT "B.", rs.size()); - return false; + // Release old mapping. + if (_base != NULL) { + MmapArrayAllocator::free(_base, _chunk_capacity); } - assert(vs.committed_size() == rs.size(), "Failed to commit all of the mark stack."); - - // Release old mapping. - _reserved_space.release(); - - // Save new mapping for future unmapping. - _reserved_space = rs; - - MemTracker::record_virtual_memory_type((address)_reserved_space.base(), mtGC); - - _base = (oop*) vs.low(); - _capacity = new_capacity; + _base = new_base; + _chunk_capacity = new_capacity; set_empty(); _should_expand = false; return true; } -bool G1CMMarkStack::allocate(size_t capacity) { - return resize(capacity); +size_t G1CMMarkStack::capacity_alignment() { + return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*); +} + +bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { + guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); + + size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*); + + _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; + size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; + + guarantee(initial_chunk_capacity <= _max_chunk_capacity, + "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, + _max_chunk_capacity, + initial_chunk_capacity); + + log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, + initial_chunk_capacity, _max_chunk_capacity); + + return resize(initial_chunk_capacity); } void G1CMMarkStack::expand() { // Clear expansion flag _should_expand = false; - if (_capacity == MarkStackSizeMax) { - log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " elements.", _capacity); + if (_chunk_capacity == _max_chunk_capacity) { + log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); return; } - size_t old_capacity = _capacity; + size_t old_capacity = _chunk_capacity; // Double capacity if possible - size_t new_capacity = MIN2(old_capacity * 2, MarkStackSizeMax); + size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); if (resize(new_capacity)) { - log_debug(gc)("Expanded marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements", + log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", old_capacity, new_capacity); } else { - log_warning(gc)("Failed to expand marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements", + log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", old_capacity, new_capacity); } } G1CMMarkStack::~G1CMMarkStack() { if (_base != NULL) { - _base = NULL; - _reserved_space.release(); - } -} - -void G1CMMarkStack::par_push_arr(oop* buffer, size_t n) { - MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - size_t start = _index; - size_t next_index = start + n; - if (next_index > _capacity) { - _overflow = true; - return; - } - // Otherwise. - _index = next_index; - for (size_t i = 0; i < n; i++) { - size_t ind = start + i; - assert(ind < _capacity, "By overflow test above."); - _base[ind] = buffer[i]; + MmapArrayAllocator::free(_base, _chunk_capacity); } } -bool G1CMMarkStack::par_pop_arr(oop* buffer, size_t max, size_t* n) { - MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - size_t index = _index; - if (index == 0) { - *n = 0; +void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) { + elem->next = *list; + *list = elem; +} + +void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) { + MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); + add_chunk_to_list(&_chunk_list, elem); + _chunks_in_chunk_list++; +} + +void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) { + MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); + add_chunk_to_list(&_free_list, elem); +} + +G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) { + OopChunk* result = *list; + if (result != NULL) { + *list = (*list)->next; + } + return result; +} + +G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { + MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); + OopChunk* result = remove_chunk_from_list(&_chunk_list); + if (result != NULL) { + _chunks_in_chunk_list--; + } + return result; +} + +G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() { + MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); + return remove_chunk_from_list(&_free_list); +} + +G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() { + // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. + // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding + // wraparound of _hwm. + if (_hwm >= _chunk_capacity) { + return NULL; + } + + size_t cur_idx = Atomic::add(1, &_hwm) - 1; + if (cur_idx >= _chunk_capacity) { + return NULL; + } + + OopChunk* result = ::new (&_base[cur_idx]) OopChunk; + result->next = NULL; + return result; +} + +bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) { + // Get a new chunk. + OopChunk* new_chunk = remove_chunk_from_free_list(); + + if (new_chunk == NULL) { + // Did not get a chunk from the free list. Allocate from backing memory. + new_chunk = allocate_new_chunk(); + } + + if (new_chunk == NULL) { + _out_of_memory = true; return false; - } else { - size_t k = MIN2(max, index); - size_t new_ind = index - k; - for (size_t j = 0; j < k; j++) { - buffer[j] = _base[new_ind + j]; - } - _index = new_ind; - *n = k; - return true; } + + Copy::conjoint_oops_atomic(ptr_arr, new_chunk->data, OopsPerChunk); + + add_chunk_to_chunk_list(new_chunk); + + return true; } -void G1CMMarkStack::note_start_of_gc() { - assert(_saved_index == (size_t)AllBits, "note_start_of_gc()/end_of_gc() calls bracketed incorrectly"); - _saved_index = _index; +bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) { + OopChunk* cur = remove_chunk_from_chunk_list(); + + if (cur == NULL) { + return false; + } + + Copy::conjoint_oops_atomic(cur->data, ptr_arr, OopsPerChunk); + + add_chunk_to_free_list(cur); + return true; } -void G1CMMarkStack::note_end_of_gc() { - guarantee(!stack_modified(), "Saved index " SIZE_FORMAT " must be the same as " SIZE_FORMAT, _saved_index, _index); - - _saved_index = (size_t)AllBits; +void G1CMMarkStack::set_empty() { + _chunks_in_chunk_list = 0; + _hwm = 0; + clear_out_of_memory(); + _chunk_list = NULL; + _free_list = NULL; } G1CMRootRegions::G1CMRootRegions() : @@ -483,9 +538,8 @@ } } - if (!_global_mark_stack.allocate(MarkStackSize)) { + if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); - return; } _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); @@ -1695,10 +1749,10 @@ // oop closures will set the has_overflown flag if we overflow the // global marking stack. - assert(_global_mark_stack.overflow() || _global_mark_stack.is_empty(), - "mark stack should be empty (unless it overflowed)"); - - if (_global_mark_stack.overflow()) { + assert(_global_mark_stack.is_out_of_memory() || _global_mark_stack.is_empty(), + "Mark stack should be empty (unless it is out of memory)"); + + if (_global_mark_stack.is_out_of_memory()) { // This should have been done already when we tried to push an // entry on to the global mark stack. But let's do it again. set_has_overflown(); @@ -2343,49 +2397,54 @@ } void G1CMTask::move_entries_to_global_stack() { - // local array where we'll store the entries that will be popped - // from the local queue - oop buffer[global_stack_transfer_size]; - - int n = 0; + // Local array where we'll store the entries that will be popped + // from the local queue. + oop buffer[G1CMMarkStack::OopsPerChunk]; + + size_t n = 0; oop obj; - while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { + while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) { buffer[n] = obj; ++n; } + if (n < G1CMMarkStack::OopsPerChunk) { + buffer[n] = NULL; + } if (n > 0) { - // we popped at least one entry from the local queue - - if (!_cm->mark_stack_push(buffer, n)) { + if (!_cm->mark_stack_push(buffer)) { set_has_aborted(); } } - // this operation was quite expensive, so decrease the limits + // This operation was quite expensive, so decrease the limits. decrease_limits(); } -void G1CMTask::get_entries_from_global_stack() { - // local array where we'll store the entries that will be popped +bool G1CMTask::get_entries_from_global_stack() { + // Local array where we'll store the entries that will be popped // from the global stack. - oop buffer[global_stack_transfer_size]; - size_t n; - _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); - assert(n <= global_stack_transfer_size, - "we should not pop more than the given limit"); - if (n > 0) { - // yes, we did actually pop at least one entry - for (size_t i = 0; i < n; ++i) { - bool success = _task_queue->push(buffer[i]); - // We only call this when the local queue is empty or under a - // given target limit. So, we do not expect this push to fail. - assert(success, "invariant"); + oop buffer[G1CMMarkStack::OopsPerChunk]; + + if (!_cm->mark_stack_pop(buffer)) { + return false; + } + + // We did actually pop at least one entry. + for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) { + oop elem = buffer[i]; + if (elem == NULL) { + break; } + bool success = _task_queue->push(elem); + // We only call this when the local queue is empty or under a + // given target limit. So, we do not expect this push to fail. + assert(success, "invariant"); } - // this operation was quite expensive, so decrease the limits + // This operation was quite expensive, so decrease the limits decrease_limits(); + return true; } void G1CMTask::drain_local_queue(bool partially) { @@ -2429,20 +2488,21 @@ // Decide what the target size is, depending whether we're going to // drain it partially (so that other tasks can steal if they run out - // of things to do) or totally (at the very end). Notice that, - // because we move entries from the global stack in chunks or - // because another task might be doing the same, we might in fact - // drop below the target. But, this is not a problem. - size_t target_size; + // of things to do) or totally (at the very end). + // Notice that when draining the global mark stack partially, due to the racyness + // of the mark stack size update we might in fact drop below the target. But, + // this is not a problem. + // In case of total draining, we simply process until the global mark stack is + // totally empty, disregarding the size counter. if (partially) { - target_size = _cm->partial_mark_stack_size_target(); + size_t const target_size = _cm->partial_mark_stack_size_target(); + while (!has_aborted() && _cm->mark_stack_size() > target_size) { + if (get_entries_from_global_stack()) { + drain_local_queue(partially); + } + } } else { - target_size = 0; - } - - if (_cm->mark_stack_size() > target_size) { - while (!has_aborted() && _cm->mark_stack_size() > target_size) { - get_entries_from_global_stack(); + while (!has_aborted() && get_entries_from_global_stack()) { drain_local_queue(partially); } } diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Thu Sep 15 16:44:19 2016 +0200 @@ -149,42 +149,98 @@ // // Stores oops in a huge buffer in virtual memory that is always fully committed. // Resizing may only happen during a STW pause when the stack is empty. +// +// Memory is allocated on a "chunk" basis, i.e. a set of oops. For this, the mark +// stack memory is split into evenly sized chunks of oops. Users can only +// add or remove entries on that basis. +// Chunks are filled in increasing address order. Not completely filled chunks +// have a NULL element as a terminating element. +// +// Every chunk has a header containing a single pointer element used for memory +// management. This wastes some space, but is negligible (< .1% with current sizing). +// +// Memory management is done using a mix of tracking a high water-mark indicating +// that all chunks at a lower address are valid chunks, and a singly linked free +// list connecting all empty chunks. class G1CMMarkStack VALUE_OBJ_CLASS_SPEC { - ReservedSpace _reserved_space; // Space currently reserved for the mark stack. +public: + // Number of oops that can fit in a single chunk. + static const size_t OopsPerChunk = 1024 - 1 /* One reference for the next pointer */; +private: + struct OopChunk { + OopChunk* next; + oop data[OopsPerChunk]; + }; + + size_t _max_chunk_capacity; // Maximum number of OopChunk elements on the stack. + + OopChunk* _base; // Bottom address of allocated memory area. + size_t _chunk_capacity; // Current maximum number of OopChunk elements. - oop* _base; // Bottom address of allocated memory area. - size_t _capacity; // Maximum number of elements. - size_t _index; // One more than last occupied index. + char _pad0[DEFAULT_CACHE_LINE_SIZE]; + OopChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. + char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*)]; + OopChunk* volatile _chunk_list; // List of chunks currently containing data. + volatile size_t _chunks_in_chunk_list; + char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*) - sizeof(size_t)]; + + volatile size_t _hwm; // High water mark within the reserved space. + char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; + + // Allocate a new chunk from the reserved memory, using the high water mark. Returns + // NULL if out of memory. + OopChunk* allocate_new_chunk(); - size_t _saved_index; // Value of _index saved at start of GC to detect mark stack modifications during that time. + volatile bool _out_of_memory; - bool _overflow; + // Atomically add the given chunk to the list. + void add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem); + // Atomically remove and return a chunk from the given list. Returns NULL if the + // list is empty. + OopChunk* remove_chunk_from_list(OopChunk* volatile* list); + + void add_chunk_to_chunk_list(OopChunk* elem); + void add_chunk_to_free_list(OopChunk* elem); + + OopChunk* remove_chunk_from_chunk_list(); + OopChunk* remove_chunk_from_free_list(); + bool _should_expand; // Resizes the mark stack to the given new capacity. Releases any previous // memory if successful. bool resize(size_t new_capacity); - bool stack_modified() const { return _index != _saved_index; } public: G1CMMarkStack(); ~G1CMMarkStack(); - bool allocate(size_t capacity); + // Alignment and minimum capacity of this mark stack in number of oops. + static size_t capacity_alignment(); + + // Allocate and initialize the mark stack with the given number of oops. + bool initialize(size_t initial_capacity, size_t max_capacity); - // Pushes the first "n" elements of the given buffer on the stack. - void par_push_arr(oop* buffer, size_t n); + // Pushes the given buffer containing at most OopsPerChunk elements on the mark + // stack. If less than OopsPerChunk elements are to be pushed, the array must + // be terminated with a NULL. + // Returns whether the buffer contents were successfully pushed to the global mark + // stack. + bool par_push_chunk(oop* buffer); - // Moves up to max elements from the stack into the given buffer. Returns - // the number of elements pushed, and false if the array has been empty. - // Returns true if the buffer contains at least one element. - bool par_pop_arr(oop* buffer, size_t max, size_t* n); + // Pops a chunk from this mark stack, copying them into the given buffer. This + // chunk may contain up to OopsPerChunk elements. If there are less, the last + // element in the array is a NULL pointer. + bool par_pop_chunk(oop* buffer); - bool is_empty() const { return _index == 0; } - size_t capacity() const { return _capacity; } + // Return whether the chunk list is empty. Racy due to unsynchronized access to + // _chunk_list. + bool is_empty() const { return _chunk_list == NULL; } - bool overflow() const { return _overflow; } - void clear_overflow() { _overflow = false; } + size_t capacity() const { return _chunk_capacity; } + + bool is_out_of_memory() const { return _out_of_memory; } + void clear_out_of_memory() { _out_of_memory = false; } bool should_expand() const { return _should_expand; } void set_should_expand(bool value) { _should_expand = value; } @@ -192,20 +248,15 @@ // Expand the stack, typically in response to an overflow condition void expand(); - size_t size() const { return _index; } - - void set_empty() { _index = 0; clear_overflow(); } - - // Record the current index. - void note_start_of_gc(); + // Return the approximate number of oops on this mark stack. Racy due to + // unsynchronized access to _chunks_in_chunk_list. + size_t size() const { return _chunks_in_chunk_list * OopsPerChunk; } - // Make sure that we have not added any entries to the stack during GC. - void note_end_of_gc(); + void set_empty(); - // Apply fn to each oop in the mark stack, up to the bound recorded - // via one of the above "note" functions. The mark stack must not + // Apply Fn to every oop on the mark stack. The mark stack must not // be modified while iterating. - template void iterate(Fn fn); + template void iterate(Fn fn) const PRODUCT_RETURN; }; // Root Regions are regions that are not empty at the beginning of a @@ -278,7 +329,6 @@ friend class G1CMDrainMarkingStackClosure; friend class G1CMBitMapClosure; friend class G1CMConcurrentMarkingTask; - friend class G1CMMarkStack; friend class G1CMRemarkTask; friend class G1CMTask; @@ -479,22 +529,20 @@ public: // Manipulation of the global mark stack. // The push and pop operations are used by tasks for transfers - // between task-local queues and the global mark stack, and use - // locking for concurrency safety. - bool mark_stack_push(oop* arr, size_t n) { - _global_mark_stack.par_push_arr(arr, n); - if (_global_mark_stack.overflow()) { + // between task-local queues and the global mark stack. + bool mark_stack_push(oop* arr) { + if (!_global_mark_stack.par_push_chunk(arr)) { set_has_overflown(); return false; } return true; } - void mark_stack_pop(oop* arr, size_t max, size_t* n) { - _global_mark_stack.par_pop_arr(arr, max, n); + bool mark_stack_pop(oop* arr) { + return _global_mark_stack.par_pop_chunk(arr); } size_t mark_stack_size() { return _global_mark_stack.size(); } size_t partial_mark_stack_size_target() { return _global_mark_stack.capacity()/3; } - bool mark_stack_overflow() { return _global_mark_stack.overflow(); } + bool mark_stack_overflow() { return _global_mark_stack.is_out_of_memory(); } bool mark_stack_empty() { return _global_mark_stack.is_empty(); } G1CMRootRegions* root_regions() { return &_root_regions; } @@ -599,16 +647,6 @@ // read-only, so use this carefully! void clearRangePrevBitmap(MemRegion mr); - // Notify data structures that a GC has started. - void note_start_of_gc() { - _global_mark_stack.note_start_of_gc(); - } - - // Notify data structures that a GC is finished. - void note_end_of_gc() { - _global_mark_stack.note_end_of_gc(); - } - // Verify that there are no CSet oops on the stacks (taskqueues / // global mark stack) and fingers (global / per-task). // If marking is not in progress, it's a no-op. @@ -670,10 +708,7 @@ // references reaches this limit refs_reached_period = 384, // Initial value for the hash seed, used in the work stealing code - init_hash_seed = 17, - // How many entries will be transferred between global stack and - // local queues at once. - global_stack_transfer_size = 1024 + init_hash_seed = 17 }; uint _worker_id; @@ -858,9 +893,10 @@ // It pushes an object on the local queue. inline void push(oop obj); - // These two move entries to/from the global stack. + // Move entries to the global stack. void move_entries_to_global_stack(); - void get_entries_from_global_stack(); + // Move entries from the global stack, return true if we were successful to do so. + bool get_entries_from_global_stack(); // It pops and scans objects from the local queue. If partially is // true, then it stops when the queue size is of a given limit. If diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Thu Sep 15 16:44:19 2016 +0200 @@ -89,14 +89,28 @@ #undef check_mark +#ifndef PRODUCT template -inline void G1CMMarkStack::iterate(Fn fn) { +inline void G1CMMarkStack::iterate(Fn fn) const { assert_at_safepoint(true); - assert(!stack_modified(), "Saved index " SIZE_FORMAT " must be the same as " SIZE_FORMAT, _saved_index, _index); - for (size_t i = 0; i < _index; ++i) { - fn(_base[i]); + + size_t num_chunks = 0; + + OopChunk* cur = _chunk_list; + while (cur != NULL) { + guarantee(num_chunks <= _chunks_in_chunk_list, "Found " SIZE_FORMAT " oop chunks which is more than there should be", num_chunks); + + for (size_t i = 0; i < OopsPerChunk; ++i) { + if (cur->data[i] == NULL) { + break; + } + fn(cur->data[i]); + } + cur = cur->next; + num_chunks++; } } +#endif // It scans an object and visits its children. inline void G1CMTask::scan_object(oop obj) { process_grey_object(obj); } diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/gc/g1/g1OopClosures.hpp --- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp Thu Sep 15 16:44:19 2016 +0200 @@ -34,7 +34,6 @@ class G1ConcurrentMark; class DirtyCardToOopClosure; class G1CMBitMap; -class G1CMMarkStack; class G1ParScanThreadState; class G1CMTask; class ReferenceProcessor; diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/memory/allocation.hpp --- a/hotspot/src/share/vm/memory/allocation.hpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/memory/allocation.hpp Thu Sep 15 16:44:19 2016 +0200 @@ -738,6 +738,7 @@ static size_t size_for(size_t length); public: + static E* allocate_or_null(size_t length); static E* allocate(size_t length); static void free(E* addr, size_t length); }; diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/memory/allocation.inline.hpp --- a/hotspot/src/share/vm/memory/allocation.inline.hpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/memory/allocation.inline.hpp Thu Sep 15 16:44:19 2016 +0200 @@ -153,6 +153,24 @@ } template +E* MmapArrayAllocator::allocate_or_null(size_t length) { + size_t size = size_for(length); + int alignment = os::vm_allocation_granularity(); + + char* addr = os::reserve_memory(size, NULL, alignment, F); + if (addr == NULL) { + return NULL; + } + + if (os::commit_memory(addr, size, !ExecMem, "Allocator (commit)")) { + return (E*)addr; + } else { + os::release_memory(addr, size); + return NULL; + } +} + +template E* MmapArrayAllocator::allocate(size_t length) { size_t size = size_for(length); int alignment = os::vm_allocation_granularity(); diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/runtime/mutexLocker.cpp --- a/hotspot/src/share/vm/runtime/mutexLocker.cpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp Thu Sep 15 16:44:19 2016 +0200 @@ -77,6 +77,8 @@ Mutex* DirtyCardQ_FL_lock = NULL; Monitor* DirtyCardQ_CBL_mon = NULL; Mutex* Shared_DirtyCardQ_lock = NULL; +Mutex* MarkStackFreeList_lock = NULL; +Mutex* MarkStackChunkList_lock = NULL; Mutex* ParGCRareEvent_lock = NULL; Mutex* DerivedPointerTableGC_lock = NULL; Mutex* Compile_lock = NULL; @@ -194,6 +196,9 @@ def(StringDedupQueue_lock , Monitor, leaf, true, Monitor::_safepoint_check_never); def(StringDedupTable_lock , Mutex , leaf, true, Monitor::_safepoint_check_never); + + def(MarkStackFreeList_lock , Mutex , leaf , true, Monitor::_safepoint_check_never); + def(MarkStackChunkList_lock , Mutex , leaf , true, Monitor::_safepoint_check_never); } def(ParGCRareEvent_lock , Mutex , leaf , true, Monitor::_safepoint_check_sometimes); def(DerivedPointerTableGC_lock , Mutex, leaf, true, Monitor::_safepoint_check_never); diff -r 68d3c7c39153 -r ff9f64534cff hotspot/src/share/vm/runtime/mutexLocker.hpp --- a/hotspot/src/share/vm/runtime/mutexLocker.hpp Wed Sep 14 16:20:54 2016 +0300 +++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp Thu Sep 15 16:44:19 2016 +0200 @@ -81,7 +81,8 @@ extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card // queue shared by // non-Java threads. - // (see option ExplicitGCInvokesConcurrent) +extern Mutex* MarkStackFreeList_lock; // Protects access to the global mark stack free list. +extern Mutex* MarkStackChunkList_lock; // Protects access to the global mark stack chunk list. extern Mutex* ParGCRareEvent_lock; // Synchronizes various (rare) parallel GC ops. extern Mutex* Compile_lock; // a lock held when Compilation is updating code (used to block CodeCache traversal, CHA updates, etc) extern Monitor* MethodCompileQueue_lock; // a lock held when method compilations are enqueued, dequeued