# HG changeset patch # User tschatzl # Date 1489574686 -3600 # Node ID 6061df52d610404989e5468de85d0e49cf6303de # Parent 91576389a517fbef384496808f78606551f95988 8168467: Use TaskEntry as task mark queue elements Summary: Change the mark stack to use TaskEntry queue elements to improve type safety instead of casting around raw pointers. Reviewed-by: kbarrett, sangheki diff -r 91576389a517 -r 6061df52d610 hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Tue Mar 14 14:07:24 2017 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Wed Mar 15 11:44:46 2017 +0100 @@ -145,15 +145,15 @@ assert(new_capacity <= _max_chunk_capacity, "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); - OopChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity); + TaskQueueEntryChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity); if (new_base == NULL) { - log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk)); + log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); return false; } // Release old mapping. if (_base != NULL) { - MmapArrayAllocator::free(_base, _chunk_capacity); + MmapArrayAllocator::free(_base, _chunk_capacity); } _base = new_base; @@ -165,16 +165,16 @@ } size_t G1CMMarkStack::capacity_alignment() { - return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*); + return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); } bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); - size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*); - - _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; - size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; + size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); + + _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; + size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; guarantee(initial_chunk_capacity <= _max_chunk_capacity, "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, @@ -210,49 +210,49 @@ G1CMMarkStack::~G1CMMarkStack() { if (_base != NULL) { - MmapArrayAllocator::free(_base, _chunk_capacity); + MmapArrayAllocator::free(_base, _chunk_capacity); } } -void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) { +void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { elem->next = *list; *list = elem; } -void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) { +void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); add_chunk_to_list(&_chunk_list, elem); _chunks_in_chunk_list++; } -void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) { +void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); add_chunk_to_list(&_free_list, elem); } -G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) { - OopChunk* result = *list; +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { + TaskQueueEntryChunk* result = *list; if (result != NULL) { *list = (*list)->next; } return result; } -G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); - OopChunk* result = remove_chunk_from_list(&_chunk_list); + TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); if (result != NULL) { _chunks_in_chunk_list--; } return result; } -G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() { +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); return remove_chunk_from_list(&_free_list); } -G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() { +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding // wraparound of _hwm. @@ -265,14 +265,14 @@ return NULL; } - OopChunk* result = ::new (&_base[cur_idx]) OopChunk; + TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; result->next = NULL; return result; } -bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) { +bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { // Get a new chunk. - OopChunk* new_chunk = remove_chunk_from_free_list(); + TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); if (new_chunk == NULL) { // Did not get a chunk from the free list. Allocate from backing memory. @@ -283,21 +283,21 @@ } } - Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop)); + Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); add_chunk_to_chunk_list(new_chunk); return true; } -bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) { - OopChunk* cur = remove_chunk_from_chunk_list(); +bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { + TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); if (cur == NULL) { return false; } - Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop)); + Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); add_chunk_to_free_list(cur); return true; @@ -1995,13 +1995,17 @@ _info(info) { } - void operator()(oop obj) const { - guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(), + void operator()(G1TaskQueueEntry task_entry) const { + if (task_entry.is_array_slice()) { + guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); + return; + } + guarantee(task_entry.obj()->is_oop(), "Non-oop " PTR_FORMAT ", phase: %s, info: %d", - p2i(obj), _phase, _info); - guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj), + p2i(task_entry.obj()), _phase, _info); + guarantee(!_g1h->is_in_cset(task_entry.obj()), "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", - p2i(obj), _phase, _info); + p2i(task_entry.obj()), _phase, _info); } }; @@ -2195,7 +2199,7 @@ // We move that task's local finger along. _task->move_finger_to(addr); - _task->scan_object(oop(addr)); + _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); // we only partially drain the local queue and global stack _task->drain_local_queue(true); _task->drain_global_stack(true); @@ -2386,16 +2390,16 @@ void G1CMTask::move_entries_to_global_stack() { // Local array where we'll store the entries that will be popped // from the local queue. - oop buffer[G1CMMarkStack::OopsPerChunk]; + G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; size_t n = 0; - oop obj; - while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) { - buffer[n] = obj; + G1TaskQueueEntry task_entry; + while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { + buffer[n] = task_entry; ++n; } - if (n < G1CMMarkStack::OopsPerChunk) { - buffer[n] = NULL; + if (n < G1CMMarkStack::EntriesPerChunk) { + buffer[n] = G1TaskQueueEntry(); } if (n > 0) { @@ -2411,20 +2415,20 @@ bool G1CMTask::get_entries_from_global_stack() { // Local array where we'll store the entries that will be popped // from the global stack. - oop buffer[G1CMMarkStack::OopsPerChunk]; + G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; if (!_cm->mark_stack_pop(buffer)) { return false; } // We did actually pop at least one entry. - for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) { - oop elem = buffer[i]; - if (elem == NULL) { + for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { + G1TaskQueueEntry task_entry = buffer[i]; + if (task_entry.is_null()) { break; } - assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem)); - bool success = _task_queue->push(elem); + assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); + bool success = _task_queue->push(task_entry); // We only call this when the local queue is empty or under a // given target limit. So, we do not expect this push to fail. assert(success, "invariant"); @@ -2451,14 +2455,14 @@ } if (_task_queue->size() > target_size) { - oop obj; - bool ret = _task_queue->pop_local(obj); + G1TaskQueueEntry entry; + bool ret = _task_queue->pop_local(entry); while (ret) { - scan_object(obj); + scan_task_entry(entry); if (_task_queue->size() <= target_size || has_aborted()) { ret = false; } else { - ret = _task_queue->pop_local(obj); + ret = _task_queue->pop_local(entry); } } } @@ -2539,8 +2543,8 @@ _step_times_ms.maximum(), _step_times_ms.sum()); } -bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { - return _task_queues->steal(worker_id, hash_seed, obj); +bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { + return _task_queues->steal(worker_id, hash_seed, task_entry); } /***************************************************************************** @@ -2863,9 +2867,9 @@ assert(_cm->out_of_regions() && _task_queue->size() == 0, "only way to reach here"); while (!has_aborted()) { - oop obj; - if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { - scan_object(obj); + G1TaskQueueEntry entry; + if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { + scan_task_entry(entry); // And since we're towards the end, let's totally drain the // local queue and global stack. diff -r 91576389a517 -r 6061df52d610 hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Tue Mar 14 14:07:24 2017 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Wed Mar 15 11:44:46 2017 +0100 @@ -38,7 +38,62 @@ class ConcurrentGCTimer; class G1OldTracer; class G1SurvivorRegions; -typedef GenericTaskQueue G1CMTaskQueue; + +#ifdef _MSC_VER +#pragma warning(push) +// warning C4522: multiple assignment operators specified +#pragma warning(disable:4522) +#endif + +// This is a container class for either an oop or a continuation address for +// mark stack entries. Both are pushed onto the mark stack. +class G1TaskQueueEntry VALUE_OBJ_CLASS_SPEC { +private: + void* _holder; + + static const uintptr_t ArraySliceBit = 1; + + G1TaskQueueEntry(oop obj) : _holder(obj) { + assert(_holder != NULL, "Not allowed to set NULL task queue element"); + } + G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { } +public: + G1TaskQueueEntry(const G1TaskQueueEntry& other) { _holder = other._holder; } + G1TaskQueueEntry() : _holder(NULL) { } + + static G1TaskQueueEntry from_slice(HeapWord* what) { return G1TaskQueueEntry(what); } + static G1TaskQueueEntry from_oop(oop obj) { return G1TaskQueueEntry(obj); } + + G1TaskQueueEntry& operator=(const G1TaskQueueEntry& t) { + _holder = t._holder; + return *this; + } + + volatile G1TaskQueueEntry& operator=(const volatile G1TaskQueueEntry& t) volatile { + _holder = t._holder; + return *this; + } + + oop obj() const { + assert(!is_array_slice(), "Trying to read array slice " PTR_FORMAT " as oop", p2i(_holder)); + return (oop)_holder; + } + + HeapWord* slice() const { + assert(is_array_slice(), "Trying to read oop " PTR_FORMAT " as array slice", p2i(_holder)); + return (HeapWord*)((uintptr_t)_holder & ~ArraySliceBit); + } + + bool is_oop() const { return !is_array_slice(); } + bool is_array_slice() const { return ((uintptr_t)_holder & ArraySliceBit) != 0; } + bool is_null() const { return _holder == NULL; } +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +typedef GenericTaskQueue G1CMTaskQueue; typedef GenericTaskQueueSet G1CMTaskQueueSet; // Closure used by CM during concurrent reference discovery @@ -165,44 +220,44 @@ // list connecting all empty chunks. class G1CMMarkStack VALUE_OBJ_CLASS_SPEC { public: - // Number of oops that can fit in a single chunk. - static const size_t OopsPerChunk = 1024 - 1 /* One reference for the next pointer */; + // Number of TaskQueueEntries that can fit in a single chunk. + static const size_t EntriesPerChunk = 1024 - 1 /* One reference for the next pointer */; private: - struct OopChunk { - OopChunk* next; - oop data[OopsPerChunk]; + struct TaskQueueEntryChunk { + TaskQueueEntryChunk* next; + G1TaskQueueEntry data[EntriesPerChunk]; }; - size_t _max_chunk_capacity; // Maximum number of OopChunk elements on the stack. + size_t _max_chunk_capacity; // Maximum number of TaskQueueEntryChunk elements on the stack. - OopChunk* _base; // Bottom address of allocated memory area. - size_t _chunk_capacity; // Current maximum number of OopChunk elements. + TaskQueueEntryChunk* _base; // Bottom address of allocated memory area. + size_t _chunk_capacity; // Current maximum number of TaskQueueEntryChunk elements. char _pad0[DEFAULT_CACHE_LINE_SIZE]; - OopChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. - char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*)]; - OopChunk* volatile _chunk_list; // List of chunks currently containing data. + TaskQueueEntryChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. + char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*)]; + TaskQueueEntryChunk* volatile _chunk_list; // List of chunks currently containing data. volatile size_t _chunks_in_chunk_list; - char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*) - sizeof(size_t)]; + char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*) - sizeof(size_t)]; volatile size_t _hwm; // High water mark within the reserved space. char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; // Allocate a new chunk from the reserved memory, using the high water mark. Returns // NULL if out of memory. - OopChunk* allocate_new_chunk(); + TaskQueueEntryChunk* allocate_new_chunk(); // Atomically add the given chunk to the list. - void add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem); + void add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem); // Atomically remove and return a chunk from the given list. Returns NULL if the // list is empty. - OopChunk* remove_chunk_from_list(OopChunk* volatile* list); + TaskQueueEntryChunk* remove_chunk_from_list(TaskQueueEntryChunk* volatile* list); - void add_chunk_to_chunk_list(OopChunk* elem); - void add_chunk_to_free_list(OopChunk* elem); + void add_chunk_to_chunk_list(TaskQueueEntryChunk* elem); + void add_chunk_to_free_list(TaskQueueEntryChunk* elem); - OopChunk* remove_chunk_from_chunk_list(); - OopChunk* remove_chunk_from_free_list(); + TaskQueueEntryChunk* remove_chunk_from_chunk_list(); + TaskQueueEntryChunk* remove_chunk_from_free_list(); bool _should_expand; @@ -220,17 +275,17 @@ // Allocate and initialize the mark stack with the given number of oops. bool initialize(size_t initial_capacity, size_t max_capacity); - // Pushes the given buffer containing at most OopsPerChunk elements on the mark - // stack. If less than OopsPerChunk elements are to be pushed, the array must + // Pushes the given buffer containing at most EntriesPerChunk elements on the mark + // stack. If less than EntriesPerChunk elements are to be pushed, the array must // be terminated with a NULL. // Returns whether the buffer contents were successfully pushed to the global mark // stack. - bool par_push_chunk(oop* buffer); + bool par_push_chunk(G1TaskQueueEntry* buffer); // Pops a chunk from this mark stack, copying them into the given buffer. This - // chunk may contain up to OopsPerChunk elements. If there are less, the last + // chunk may contain up to EntriesPerChunk elements. If there are less, the last // element in the array is a NULL pointer. - bool par_pop_chunk(oop* buffer); + bool par_pop_chunk(G1TaskQueueEntry* buffer); // Return whether the chunk list is empty. Racy due to unsynchronized access to // _chunk_list. @@ -246,7 +301,7 @@ // Return the approximate number of oops on this mark stack. Racy due to // unsynchronized access to _chunks_in_chunk_list. - size_t size() const { return _chunks_in_chunk_list * OopsPerChunk; } + size_t size() const { return _chunks_in_chunk_list * EntriesPerChunk; } void set_empty(); @@ -526,14 +581,14 @@ // Manipulation of the global mark stack. // The push and pop operations are used by tasks for transfers // between task-local queues and the global mark stack. - bool mark_stack_push(oop* arr) { + bool mark_stack_push(G1TaskQueueEntry* arr) { if (!_global_mark_stack.par_push_chunk(arr)) { set_has_overflown(); return false; } return true; } - bool mark_stack_pop(oop* arr) { + bool mark_stack_pop(G1TaskQueueEntry* arr) { return _global_mark_stack.par_pop_chunk(arr); } size_t mark_stack_size() { return _global_mark_stack.size(); } @@ -567,7 +622,7 @@ } // Attempts to steal an object from the task queues of other tasks - bool try_stealing(uint worker_id, int* hash_seed, oop& obj); + bool try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry); G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, @@ -822,7 +877,7 @@ // mark bitmap scan, and so needs to be pushed onto the mark stack. bool is_below_finger(oop obj, HeapWord* global_finger) const; - template void process_grey_object(oop obj); + template void process_grey_task_entry(G1TaskQueueEntry task_entry); public: // Apply the closure on the given area of the objArray. Return the number of words // scanned. @@ -887,10 +942,10 @@ inline void deal_with_reference(oop obj); // It scans an object and visits its children. - inline void scan_object(oop obj); + inline void scan_task_entry(G1TaskQueueEntry task_entry); // It pushes an object on the local queue. - inline void push(oop obj); + inline void push(G1TaskQueueEntry task_entry); // Move entries to the global stack. void move_entries_to_global_stack(); diff -r 91576389a517 -r 6061df52d610 hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Tue Mar 14 14:07:24 2017 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Wed Mar 15 11:44:46 2017 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -97,12 +97,12 @@ size_t num_chunks = 0; - OopChunk* cur = _chunk_list; + TaskQueueEntryChunk* cur = _chunk_list; while (cur != NULL) { guarantee(num_chunks <= _chunks_in_chunk_list, "Found " SIZE_FORMAT " oop chunks which is more than there should be", num_chunks); - for (size_t i = 0; i < OopsPerChunk; ++i) { - if (cur->data[i] == NULL) { + for (size_t i = 0; i < EntriesPerChunk; ++i) { + if (cur->data[i].is_null()) { break; } fn(cur->data[i]); @@ -114,17 +114,16 @@ #endif // It scans an object and visits its children. -inline void G1CMTask::scan_object(oop obj) { process_grey_object(obj); } +inline void G1CMTask::scan_task_entry(G1TaskQueueEntry task_entry) { process_grey_task_entry(task_entry); } -inline void G1CMTask::push(oop obj) { - HeapWord* objAddr = (HeapWord*) obj; - assert(G1CMObjArrayProcessor::is_array_slice(obj) || _g1h->is_in_g1_reserved(objAddr), "invariant"); - assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_on_master_free_list( - _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); - assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_obj_ill(obj), "invariant"); - assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked(objAddr), "invariant"); +inline void G1CMTask::push(G1TaskQueueEntry task_entry) { + assert(task_entry.is_array_slice() || _g1h->is_in_g1_reserved(task_entry.obj()), "invariant"); + assert(task_entry.is_array_slice() || !_g1h->is_on_master_free_list( + _g1h->heap_region_containing(task_entry.obj())), "invariant"); + assert(task_entry.is_array_slice() || !_g1h->is_obj_ill(task_entry.obj()), "invariant"); // FIXME!!! + assert(task_entry.is_array_slice() || _nextMarkBitMap->isMarked((HeapWord*)task_entry.obj()), "invariant"); - if (!_task_queue->push(obj)) { + if (!_task_queue->push(task_entry)) { // The local task queue looks full. We need to push some entries // to the global stack. move_entries_to_global_stack(); @@ -132,7 +131,7 @@ // this should succeed since, even if we overflow the global // stack, we should have definitely removed some entries from the // local queue. So, there must be space on it. - bool success = _task_queue->push(obj); + bool success = _task_queue->push(task_entry); assert(success, "invariant"); } } @@ -168,18 +167,21 @@ } template -inline void G1CMTask::process_grey_object(oop obj) { - assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray"); - assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj), +inline void G1CMTask::process_grey_task_entry(G1TaskQueueEntry task_entry) { + assert(scan || (task_entry.is_oop() && task_entry.obj()->is_typeArray()), "Skipping scan of grey non-typeArray"); + assert(task_entry.is_array_slice() || _nextMarkBitMap->isMarked((HeapWord*)task_entry.obj()), "Any stolen object should be a slice or marked"); if (scan) { - if (G1CMObjArrayProcessor::is_array_slice(obj)) { - _words_scanned += _objArray_processor.process_slice(obj); - } else if (G1CMObjArrayProcessor::should_be_sliced(obj)) { - _words_scanned += _objArray_processor.process_obj(obj); + if (task_entry.is_array_slice()) { + _words_scanned += _objArray_processor.process_slice(task_entry.slice()); } else { - _words_scanned += obj->oop_iterate_size(_cm_oop_closure);; + oop obj = task_entry.obj(); + if (G1CMObjArrayProcessor::should_be_sliced(obj)) { + _words_scanned += _objArray_processor.process_obj(obj); + } else { + _words_scanned += obj->oop_iterate_size(_cm_oop_closure);; + } } } check_limits(); @@ -210,6 +212,7 @@ // be pushed on the stack. So, some duplicate work, but no // correctness problems. if (is_below_finger(obj, global_finger)) { + G1TaskQueueEntry entry = G1TaskQueueEntry::from_oop(obj); if (obj->is_typeArray()) { // Immediately process arrays of primitive types, rather // than pushing on the mark stack. This keeps us from @@ -221,9 +224,9 @@ // by only doing a bookkeeping update and avoiding the // actual scan of the object - a typeArray contains no // references, and the metadata is built-in. - process_grey_object(obj); + process_grey_task_entry(entry); } else { - push(obj); + push(entry); } } } diff -r 91576389a517 -r 6061df52d610 hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp Tue Mar 14 14:07:24 2017 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp Wed Mar 15 11:44:46 2017 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,18 +26,8 @@ #include "gc/g1/g1ConcurrentMark.inline.hpp" #include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp" -oop G1CMObjArrayProcessor::encode_array_slice(HeapWord* addr) { - return oop((void*)((uintptr_t)addr | ArraySliceBit)); -} - -HeapWord* G1CMObjArrayProcessor::decode_array_slice(oop value) { - assert(is_array_slice(value), "Given value " PTR_FORMAT " is not an array slice", p2i(value)); - return (HeapWord*)((uintptr_t)(void*)value & ~ArraySliceBit); -} - void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) { - oop obj = encode_array_slice(what); - _task->push(obj); + _task->push(G1TaskQueueEntry::from_slice(what)); } size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) { @@ -58,30 +48,29 @@ return process_array_slice(objArrayOop(obj), (HeapWord*)obj, (size_t)objArrayOop(obj)->size()); } -size_t G1CMObjArrayProcessor::process_slice(oop obj) { - HeapWord* const decoded_address = decode_array_slice(obj); +size_t G1CMObjArrayProcessor::process_slice(HeapWord* slice) { // Find the start address of the objArrayOop. // Shortcut the BOT access if the given address is from a humongous object. The BOT // slide is fast enough for "smaller" objects in non-humongous regions, but is slower // than directly using heap region table. G1CollectedHeap* g1h = G1CollectedHeap::heap(); - HeapRegion* r = g1h->heap_region_containing(decoded_address); + HeapRegion* r = g1h->heap_region_containing(slice); HeapWord* const start_address = r->is_humongous() ? r->humongous_start_region()->bottom() : - g1h->block_start(decoded_address); + g1h->block_start(slice); assert(oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address)); - assert(start_address < decoded_address, + assert(start_address < slice, "Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT, p2i(start_address), - p2i(decoded_address)); + p2i(slice)); objArrayOop objArray = objArrayOop(start_address); - size_t already_scanned = decoded_address - start_address; + size_t already_scanned = slice - start_address; size_t remaining = objArray->size() - already_scanned; - return process_array_slice(objArray, decoded_address, remaining); + return process_array_slice(objArray, slice, remaining); } diff -r 91576389a517 -r 6061df52d610 hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp Tue Mar 14 14:07:24 2017 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp Wed Mar 15 11:44:46 2017 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,32 +36,22 @@ // This allows incremental processing of large objects. class G1CMObjArrayProcessor VALUE_OBJ_CLASS_SPEC { private: - // The bit mask for the continuation indicator of elements on the mark stack. - static const size_t ArraySliceBit = 1; - // Reference to the task for doing the actual work. G1CMTask* _task; - // Encodes the given address as a continuation "oop". - oop encode_array_slice(HeapWord* addr); - // Remove the continuation marker from the given oop from the mark stack. - HeapWord* decode_array_slice(oop value); - // Push the continuation at the given address onto the mark stack. void push_array_slice(HeapWord* addr); // Process (apply the closure) on the given continuation of the given objArray. size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining); public: - static bool is_array_slice(void* obj) { return ((uintptr_t)obj & ArraySliceBit) != 0; } - static bool should_be_sliced(oop obj); G1CMObjArrayProcessor(G1CMTask* task) : _task(task) { } - // Process the given continuation "oop". Returns the number of words scanned. - size_t process_slice(oop obj); + // Process the given continuation. Returns the number of words scanned. + size_t process_slice(HeapWord* slice); // Start processing the given objArrayOop by scanning the header and pushing its // continuation. size_t process_obj(oop obj);