# HG changeset patch # User tschatzl # Date 1479983277 -3600 # Node ID a9611bab7578e277f33844f53ff86ba0196b98f3 # Parent 900edeec97766e9312eb91c723fefcdc91dc5539 8057003: Large reference arrays cause extremely long synchronization times Summary: Slice large object arrays into parts so that the synchronization of marking threads with an STW pause request does not take long. Reviewed-by: ehelin, pliden diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Thu Nov 24 09:33:54 2016 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Thu Nov 24 11:27:57 2016 +0100 @@ -2009,10 +2009,10 @@ { } void operator()(oop obj) const { - guarantee(obj->is_oop(), + guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(), "Non-oop " PTR_FORMAT ", phase: %s, info: %d", p2i(obj), _phase, _info); - guarantee(!_g1h->obj_in_cs(obj), + guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->obj_in_cs(obj), "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", p2i(obj), _phase, _info); } @@ -2436,6 +2436,7 @@ if (elem == NULL) { break; } + assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem)); bool success = _task_queue->push(elem); // We only call this when the local queue is empty or under a // given target limit. So, we do not expect this push to fail. @@ -2448,7 +2449,9 @@ } void G1CMTask::drain_local_queue(bool partially) { - if (has_aborted()) return; + if (has_aborted()) { + return; + } // Decide what the target size is, depending whether we're going to // drain it partially (so that other tasks can steal if they run out @@ -2464,12 +2467,7 @@ oop obj; bool ret = _task_queue->pop_local(obj); while (ret) { - assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); - assert(!_g1h->is_on_master_free_list( - _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); - scan_object(obj); - if (_task_queue->size() <= target_size || has_aborted()) { ret = false; } else { @@ -2880,8 +2878,6 @@ while (!has_aborted()) { oop obj; if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { - assert(_nextMarkBitMap->isMarked((HeapWord*) obj), - "any stolen object should be marked"); scan_object(obj); // And since we're towards the end, let's totally drain the @@ -3003,6 +2999,7 @@ G1CMTaskQueueSet* task_queues) : _g1h(G1CollectedHeap::heap()), _worker_id(worker_id), _cm(cm), + _objArray_processor(this), _claimed(false), _nextMarkBitMap(NULL), _hash_seed(17), _task_queue(task_queue), diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Thu Nov 24 09:33:54 2016 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Thu Nov 24 11:27:57 2016 +0100 @@ -26,6 +26,7 @@ #define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP #include "classfile/javaClasses.hpp" +#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp" #include "gc/g1/g1RegionToSpaceMapper.hpp" #include "gc/g1/heapRegionSet.hpp" #include "gc/shared/taskqueue.hpp" @@ -706,11 +707,13 @@ words_scanned_period = 12*1024, // The regular clock call is called once the number of visited // references reaches this limit - refs_reached_period = 384, + refs_reached_period = 1024, // Initial value for the hash seed, used in the work stealing code init_hash_seed = 17 }; + G1CMObjArrayProcessor _objArray_processor; + uint _worker_id; G1CollectedHeap* _g1h; G1ConcurrentMark* _cm; @@ -826,8 +829,10 @@ bool is_below_finger(oop obj, HeapWord* global_finger) const; template void process_grey_object(oop obj); - public: + // Apply the closure on the given area of the objArray. Return the number of words + // scanned. + inline size_t scan_objArray(objArrayOop obj, MemRegion mr); // It resets the task; it should be called right at the beginning of // a marking phase. void reset(G1CMBitMap* _nextMarkBitMap); diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp --- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Thu Nov 24 09:33:54 2016 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Thu Nov 24 11:27:57 2016 +0100 @@ -27,6 +27,7 @@ #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1ConcurrentMark.hpp" +#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp" #include "gc/g1/suspendibleThreadSet.hpp" #include "gc/shared/taskqueue.inline.hpp" @@ -117,11 +118,11 @@ inline void G1CMTask::push(oop obj) { HeapWord* objAddr = (HeapWord*) obj; - assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); - assert(!_g1h->is_on_master_free_list( + assert(G1CMObjArrayProcessor::is_array_slice(obj) || _g1h->is_in_g1_reserved(objAddr), "invariant"); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_on_master_free_list( _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); - assert(!_g1h->is_obj_ill(obj), "invariant"); - assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_obj_ill(obj), "invariant"); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked(objAddr), "invariant"); if (!_task_queue->push(obj)) { // The local task queue looks full. We need to push some entries @@ -169,17 +170,26 @@ template inline void G1CMTask::process_grey_object(oop obj) { assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray"); - assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); - - size_t obj_size = obj->size(); - _words_scanned += obj_size; + assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj), + "Any stolen object should be a slice or marked"); if (scan) { - obj->oop_iterate(_cm_oop_closure); + if (G1CMObjArrayProcessor::is_array_slice(obj)) { + _words_scanned += _objArray_processor.process_slice(obj); + } else if (G1CMObjArrayProcessor::should_be_sliced(obj)) { + _words_scanned += _objArray_processor.process_obj(obj); + } else { + _words_scanned += obj->oop_iterate_size(_cm_oop_closure);; + } } check_limits(); } +inline size_t G1CMTask::scan_objArray(objArrayOop obj, MemRegion mr) { + obj->oop_iterate(_cm_oop_closure, mr); + return mr.word_size(); +} + inline void G1CMTask::make_reference_grey(oop obj) { if (_cm->par_mark(obj)) { // No OrderAccess:store_load() is needed. It is implicit in the diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp Thu Nov 24 11:27:57 2016 +0100 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/g1/g1ConcurrentMark.inline.hpp" +#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp" + +oop G1CMObjArrayProcessor::encode_array_slice(HeapWord* addr) { + return oop((void*)((uintptr_t)addr | ArraySliceBit)); +} + +HeapWord* G1CMObjArrayProcessor::decode_array_slice(oop value) { + assert(is_array_slice(value), "Given value " PTR_FORMAT " is not an array slice", p2i(value)); + return (HeapWord*)((uintptr_t)(void*)value & ~ArraySliceBit); +} + +void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) { + oop obj = encode_array_slice(what); + _task->push(obj); +} + +size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) { + size_t words_to_scan = MIN2(remaining, ObjArrayMarkingStride); + + if (remaining > ObjArrayMarkingStride) { + push_array_slice(start_from + ObjArrayMarkingStride); + } + + // Then process current area. + MemRegion mr(start_from, words_to_scan); + return _task->scan_objArray(obj, mr); +} + +size_t G1CMObjArrayProcessor::process_obj(oop obj) { + assert(should_be_sliced(obj), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), (size_t)obj->size()); + + return process_array_slice(objArrayOop(obj), (HeapWord*)obj, (size_t)objArrayOop(obj)->size()); +} + +size_t G1CMObjArrayProcessor::process_slice(oop obj) { + HeapWord* const decoded_address = decode_array_slice(obj); + + // Find the start address of the objArrayOop. + // Shortcut the BOT access if the given address is from a humongous object. The BOT + // slide is fast enough for "smaller" objects in non-humongous regions, but is slower + // than directly using heap region table. + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + HeapRegion* r = g1h->heap_region_containing(decoded_address); + + HeapWord* const start_address = r->is_humongous() ? + r->humongous_start_region()->bottom() : + g1h->block_start(decoded_address); + + assert(oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address)); + assert(start_address < decoded_address, + "Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT, + p2i(start_address), + p2i(decoded_address)); + + objArrayOop objArray = objArrayOop(start_address); + + size_t already_scanned = decoded_address - start_address; + size_t remaining = objArray->size() - already_scanned; + + return process_array_slice(objArray, decoded_address, remaining); +} diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp Thu Nov 24 11:27:57 2016 +0100 @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP +#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP + +#include "oops/oopsHierarchy.hpp" +#include "memory/allocation.hpp" + +class G1CMTask; + +// Helper class to mark through large objArrays during marking in an efficient way. +// Instead of pushing large object arrays, we push continuations onto the +// mark stack. These continuations are identified by having their LSB set. +// This allows incremental processing of large objects. +class G1CMObjArrayProcessor VALUE_OBJ_CLASS_SPEC { +private: + // The bit mask for the continuation indicator of elements on the mark stack. + static const size_t ArraySliceBit = 1; + + // Reference to the task for doing the actual work. + G1CMTask* _task; + + // Encodes the given address as a continuation "oop". + oop encode_array_slice(HeapWord* addr); + // Remove the continuation marker from the given oop from the mark stack. + HeapWord* decode_array_slice(oop value); + + // Push the continuation at the given address onto the mark stack. + void push_array_slice(HeapWord* addr); + + // Process (apply the closure) on the given continuation of the given objArray. + size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining); +public: + static bool is_array_slice(void* obj) { return ((uintptr_t)obj & ArraySliceBit) != 0; } + + static bool should_be_sliced(oop obj); + + G1CMObjArrayProcessor(G1CMTask* task) : _task(task) { + } + + // Process the given continuation "oop". Returns the number of words scanned. + size_t process_slice(oop obj); + // Start processing the given objArrayOop by scanning the header and pushing its + // continuation. + size_t process_obj(oop obj); +}; + +#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP */ diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp Thu Nov 24 11:27:57 2016 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP +#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP + +#include "oops/oop.inline.hpp" +#include "oops/oopsHierarchy.hpp" +#include "runtime/globals.hpp" + +inline bool G1CMObjArrayProcessor::should_be_sliced(oop obj) { + return obj->is_objArray() && ((size_t)((objArrayOop)obj)->size()) >= 2 * ObjArrayMarkingStride; +} + +#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP */ diff -r 900edeec9776 -r a9611bab7578 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Thu Nov 24 09:33:54 2016 +0000 +++ b/hotspot/src/share/vm/runtime/globals.hpp Thu Nov 24 11:27:57 2016 +0100 @@ -1988,7 +1988,7 @@ experimental(uintx, WorkStealingSpinToYieldRatio, 10, \ "Ratio of hard spins to calls to yield") \ \ - develop(uintx, ObjArrayMarkingStride, 512, \ + develop(uintx, ObjArrayMarkingStride, 2048, \ "Number of object array elements to push onto the marking stack " \ "before pushing a continuation entry") \ \