8057003: Large reference arrays cause extremely long synchronization times
Summary: Slice large object arrays into parts so that the synchronization of marking threads with an STW pause request does not take long.
Reviewed-by: ehelin, pliden
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp Thu Nov 24 11:27:57 2016 +0100
@@ -2009,10 +2009,10 @@
{ }
void operator()(oop obj) const {
- guarantee(obj->is_oop(),
+ guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
"Non-oop " PTR_FORMAT ", phase: %s, info: %d",
p2i(obj), _phase, _info);
- guarantee(!_g1h->obj_in_cs(obj),
+ guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->obj_in_cs(obj),
"obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
p2i(obj), _phase, _info);
}
@@ -2436,6 +2436,7 @@
if (elem == NULL) {
break;
}
+ assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
bool success = _task_queue->push(elem);
// We only call this when the local queue is empty or under a
// given target limit. So, we do not expect this push to fail.
@@ -2448,7 +2449,9 @@
}
void G1CMTask::drain_local_queue(bool partially) {
- if (has_aborted()) return;
+ if (has_aborted()) {
+ return;
+ }
// Decide what the target size is, depending whether we're going to
// drain it partially (so that other tasks can steal if they run out
@@ -2464,12 +2467,7 @@
oop obj;
bool ret = _task_queue->pop_local(obj);
while (ret) {
- assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
- assert(!_g1h->is_on_master_free_list(
- _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
-
scan_object(obj);
-
if (_task_queue->size() <= target_size || has_aborted()) {
ret = false;
} else {
@@ -2880,8 +2878,6 @@
while (!has_aborted()) {
oop obj;
if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
- assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
- "any stolen object should be marked");
scan_object(obj);
// And since we're towards the end, let's totally drain the
@@ -3003,6 +2999,7 @@
G1CMTaskQueueSet* task_queues)
: _g1h(G1CollectedHeap::heap()),
_worker_id(worker_id), _cm(cm),
+ _objArray_processor(this),
_claimed(false),
_nextMarkBitMap(NULL), _hash_seed(17),
_task_queue(task_queue),
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp Thu Nov 24 11:27:57 2016 +0100
@@ -26,6 +26,7 @@
#define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
#include "classfile/javaClasses.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp"
#include "gc/g1/g1RegionToSpaceMapper.hpp"
#include "gc/g1/heapRegionSet.hpp"
#include "gc/shared/taskqueue.hpp"
@@ -706,11 +707,13 @@
words_scanned_period = 12*1024,
// The regular clock call is called once the number of visited
// references reaches this limit
- refs_reached_period = 384,
+ refs_reached_period = 1024,
// Initial value for the hash seed, used in the work stealing code
init_hash_seed = 17
};
+ G1CMObjArrayProcessor _objArray_processor;
+
uint _worker_id;
G1CollectedHeap* _g1h;
G1ConcurrentMark* _cm;
@@ -826,8 +829,10 @@
bool is_below_finger(oop obj, HeapWord* global_finger) const;
template<bool scan> void process_grey_object(oop obj);
-
public:
+ // Apply the closure on the given area of the objArray. Return the number of words
+ // scanned.
+ inline size_t scan_objArray(objArrayOop obj, MemRegion mr);
// It resets the task; it should be called right at the beginning of
// a marking phase.
void reset(G1CMBitMap* _nextMarkBitMap);
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp Thu Nov 24 11:27:57 2016 +0100
@@ -27,6 +27,7 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentMark.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
#include "gc/g1/suspendibleThreadSet.hpp"
#include "gc/shared/taskqueue.inline.hpp"
@@ -117,11 +118,11 @@
inline void G1CMTask::push(oop obj) {
HeapWord* objAddr = (HeapWord*) obj;
- assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
- assert(!_g1h->is_on_master_free_list(
+ assert(G1CMObjArrayProcessor::is_array_slice(obj) || _g1h->is_in_g1_reserved(objAddr), "invariant");
+ assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_on_master_free_list(
_g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
- assert(!_g1h->is_obj_ill(obj), "invariant");
- assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
+ assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_obj_ill(obj), "invariant");
+ assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked(objAddr), "invariant");
if (!_task_queue->push(obj)) {
// The local task queue looks full. We need to push some entries
@@ -169,17 +170,26 @@
template<bool scan>
inline void G1CMTask::process_grey_object(oop obj) {
assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
- assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
-
- size_t obj_size = obj->size();
- _words_scanned += obj_size;
+ assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj),
+ "Any stolen object should be a slice or marked");
if (scan) {
- obj->oop_iterate(_cm_oop_closure);
+ if (G1CMObjArrayProcessor::is_array_slice(obj)) {
+ _words_scanned += _objArray_processor.process_slice(obj);
+ } else if (G1CMObjArrayProcessor::should_be_sliced(obj)) {
+ _words_scanned += _objArray_processor.process_obj(obj);
+ } else {
+ _words_scanned += obj->oop_iterate_size(_cm_oop_closure);;
+ }
}
check_limits();
}
+inline size_t G1CMTask::scan_objArray(objArrayOop obj, MemRegion mr) {
+ obj->oop_iterate(_cm_oop_closure, mr);
+ return mr.word_size();
+}
+
inline void G1CMTask::make_reference_grey(oop obj) {
if (_cm->par_mark(obj)) {
// No OrderAccess:store_load() is needed. It is implicit in the
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp Thu Nov 24 11:27:57 2016 +0100
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
+
+oop G1CMObjArrayProcessor::encode_array_slice(HeapWord* addr) {
+ return oop((void*)((uintptr_t)addr | ArraySliceBit));
+}
+
+HeapWord* G1CMObjArrayProcessor::decode_array_slice(oop value) {
+ assert(is_array_slice(value), "Given value " PTR_FORMAT " is not an array slice", p2i(value));
+ return (HeapWord*)((uintptr_t)(void*)value & ~ArraySliceBit);
+}
+
+void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) {
+ oop obj = encode_array_slice(what);
+ _task->push(obj);
+}
+
+size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) {
+ size_t words_to_scan = MIN2(remaining, ObjArrayMarkingStride);
+
+ if (remaining > ObjArrayMarkingStride) {
+ push_array_slice(start_from + ObjArrayMarkingStride);
+ }
+
+ // Then process current area.
+ MemRegion mr(start_from, words_to_scan);
+ return _task->scan_objArray(obj, mr);
+}
+
+size_t G1CMObjArrayProcessor::process_obj(oop obj) {
+ assert(should_be_sliced(obj), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), (size_t)obj->size());
+
+ return process_array_slice(objArrayOop(obj), (HeapWord*)obj, (size_t)objArrayOop(obj)->size());
+}
+
+size_t G1CMObjArrayProcessor::process_slice(oop obj) {
+ HeapWord* const decoded_address = decode_array_slice(obj);
+
+ // Find the start address of the objArrayOop.
+ // Shortcut the BOT access if the given address is from a humongous object. The BOT
+ // slide is fast enough for "smaller" objects in non-humongous regions, but is slower
+ // than directly using heap region table.
+ G1CollectedHeap* g1h = G1CollectedHeap::heap();
+ HeapRegion* r = g1h->heap_region_containing(decoded_address);
+
+ HeapWord* const start_address = r->is_humongous() ?
+ r->humongous_start_region()->bottom() :
+ g1h->block_start(decoded_address);
+
+ assert(oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address));
+ assert(start_address < decoded_address,
+ "Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT,
+ p2i(start_address),
+ p2i(decoded_address));
+
+ objArrayOop objArray = objArrayOop(start_address);
+
+ size_t already_scanned = decoded_address - start_address;
+ size_t remaining = objArray->size() - already_scanned;
+
+ return process_array_slice(objArray, decoded_address, remaining);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp Thu Nov 24 11:27:57 2016 +0100
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP
+
+#include "oops/oopsHierarchy.hpp"
+#include "memory/allocation.hpp"
+
+class G1CMTask;
+
+// Helper class to mark through large objArrays during marking in an efficient way.
+// Instead of pushing large object arrays, we push continuations onto the
+// mark stack. These continuations are identified by having their LSB set.
+// This allows incremental processing of large objects.
+class G1CMObjArrayProcessor VALUE_OBJ_CLASS_SPEC {
+private:
+ // The bit mask for the continuation indicator of elements on the mark stack.
+ static const size_t ArraySliceBit = 1;
+
+ // Reference to the task for doing the actual work.
+ G1CMTask* _task;
+
+ // Encodes the given address as a continuation "oop".
+ oop encode_array_slice(HeapWord* addr);
+ // Remove the continuation marker from the given oop from the mark stack.
+ HeapWord* decode_array_slice(oop value);
+
+ // Push the continuation at the given address onto the mark stack.
+ void push_array_slice(HeapWord* addr);
+
+ // Process (apply the closure) on the given continuation of the given objArray.
+ size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining);
+public:
+ static bool is_array_slice(void* obj) { return ((uintptr_t)obj & ArraySliceBit) != 0; }
+
+ static bool should_be_sliced(oop obj);
+
+ G1CMObjArrayProcessor(G1CMTask* task) : _task(task) {
+ }
+
+ // Process the given continuation "oop". Returns the number of words scanned.
+ size_t process_slice(oop obj);
+ // Start processing the given objArrayOop by scanning the header and pushing its
+ // continuation.
+ size_t process_obj(oop obj);
+};
+
+#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp Thu Nov 24 11:27:57 2016 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP
+
+#include "oops/oop.inline.hpp"
+#include "oops/oopsHierarchy.hpp"
+#include "runtime/globals.hpp"
+
+inline bool G1CMObjArrayProcessor::should_be_sliced(oop obj) {
+ return obj->is_objArray() && ((size_t)((objArrayOop)obj)->size()) >= 2 * ObjArrayMarkingStride;
+}
+
+#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP */
--- a/hotspot/src/share/vm/runtime/globals.hpp Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/runtime/globals.hpp Thu Nov 24 11:27:57 2016 +0100
@@ -1988,7 +1988,7 @@
experimental(uintx, WorkStealingSpinToYieldRatio, 10, \
"Ratio of hard spins to calls to yield") \
\
- develop(uintx, ObjArrayMarkingStride, 512, \
+ develop(uintx, ObjArrayMarkingStride, 2048, \
"Number of object array elements to push onto the marking stack " \
"before pushing a continuation entry") \
\