8057003: Large reference arrays cause extremely long synchronization times
authortschatzl
Thu, 24 Nov 2016 11:27:57 +0100
changeset 42597 a9611bab7578
parent 42596 900edeec9776
child 42598 45562c0473fb
child 42600 d97d0621bf54
8057003: Large reference arrays cause extremely long synchronization times Summary: Slice large object arrays into parts so that the synchronization of marking threads with an STW pause request does not take long. Reviewed-by: ehelin, pliden
hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp
hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp
hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp
hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp
hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp
hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp
hotspot/src/share/vm/runtime/globals.hpp
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Thu Nov 24 11:27:57 2016 +0100
@@ -2009,10 +2009,10 @@
   { }
 
   void operator()(oop obj) const {
-    guarantee(obj->is_oop(),
+    guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
               p2i(obj), _phase, _info);
-    guarantee(!_g1h->obj_in_cs(obj),
+    guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->obj_in_cs(obj),
               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
               p2i(obj), _phase, _info);
   }
@@ -2436,6 +2436,7 @@
     if (elem == NULL) {
       break;
     }
+    assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
     bool success = _task_queue->push(elem);
     // We only call this when the local queue is empty or under a
     // given target limit. So, we do not expect this push to fail.
@@ -2448,7 +2449,9 @@
 }
 
 void G1CMTask::drain_local_queue(bool partially) {
-  if (has_aborted()) return;
+  if (has_aborted()) {
+    return;
+  }
 
   // Decide what the target size is, depending whether we're going to
   // drain it partially (so that other tasks can steal if they run out
@@ -2464,12 +2467,7 @@
     oop obj;
     bool ret = _task_queue->pop_local(obj);
     while (ret) {
-      assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
-      assert(!_g1h->is_on_master_free_list(
-                  _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
-
       scan_object(obj);
-
       if (_task_queue->size() <= target_size || has_aborted()) {
         ret = false;
       } else {
@@ -2880,8 +2878,6 @@
     while (!has_aborted()) {
       oop obj;
       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
-        assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
-               "any stolen object should be marked");
         scan_object(obj);
 
         // And since we're towards the end, let's totally drain the
@@ -3003,6 +2999,7 @@
                    G1CMTaskQueueSet* task_queues)
   : _g1h(G1CollectedHeap::heap()),
     _worker_id(worker_id), _cm(cm),
+    _objArray_processor(this),
     _claimed(false),
     _nextMarkBitMap(NULL), _hash_seed(17),
     _task_queue(task_queue),
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Thu Nov 24 11:27:57 2016 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
 
 #include "classfile/javaClasses.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp"
 #include "gc/g1/g1RegionToSpaceMapper.hpp"
 #include "gc/g1/heapRegionSet.hpp"
 #include "gc/shared/taskqueue.hpp"
@@ -706,11 +707,13 @@
     words_scanned_period          = 12*1024,
     // The regular clock call is called once the number of visited
     // references reaches this limit
-    refs_reached_period           = 384,
+    refs_reached_period           = 1024,
     // Initial value for the hash seed, used in the work stealing code
     init_hash_seed                = 17
   };
 
+  G1CMObjArrayProcessor       _objArray_processor;
+
   uint                        _worker_id;
   G1CollectedHeap*            _g1h;
   G1ConcurrentMark*           _cm;
@@ -826,8 +829,10 @@
   bool is_below_finger(oop obj, HeapWord* global_finger) const;
 
   template<bool scan> void process_grey_object(oop obj);
-
 public:
+  // Apply the closure on the given area of the objArray. Return the number of words
+  // scanned.
+  inline size_t scan_objArray(objArrayOop obj, MemRegion mr);
   // It resets the task; it should be called right at the beginning of
   // a marking phase.
   void reset(G1CMBitMap* _nextMarkBitMap);
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp	Thu Nov 24 11:27:57 2016 +0100
@@ -27,6 +27,7 @@
 
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1ConcurrentMark.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
 #include "gc/g1/suspendibleThreadSet.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
 
@@ -117,11 +118,11 @@
 
 inline void G1CMTask::push(oop obj) {
   HeapWord* objAddr = (HeapWord*) obj;
-  assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
-  assert(!_g1h->is_on_master_free_list(
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _g1h->is_in_g1_reserved(objAddr), "invariant");
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_on_master_free_list(
               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
-  assert(!_g1h->is_obj_ill(obj), "invariant");
-  assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_obj_ill(obj), "invariant");
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked(objAddr), "invariant");
 
   if (!_task_queue->push(obj)) {
     // The local task queue looks full. We need to push some entries
@@ -169,17 +170,26 @@
 template<bool scan>
 inline void G1CMTask::process_grey_object(oop obj) {
   assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
-  assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
-
-  size_t obj_size = obj->size();
-  _words_scanned += obj_size;
+  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj),
+         "Any stolen object should be a slice or marked");
 
   if (scan) {
-    obj->oop_iterate(_cm_oop_closure);
+    if (G1CMObjArrayProcessor::is_array_slice(obj)) {
+      _words_scanned += _objArray_processor.process_slice(obj);
+    } else if (G1CMObjArrayProcessor::should_be_sliced(obj)) {
+      _words_scanned += _objArray_processor.process_obj(obj);
+    } else {
+      _words_scanned += obj->oop_iterate_size(_cm_oop_closure);;
+    }
   }
   check_limits();
 }
 
+inline size_t G1CMTask::scan_objArray(objArrayOop obj, MemRegion mr) {
+  obj->oop_iterate(_cm_oop_closure, mr);
+  return mr.word_size();
+}
+
 inline void G1CMTask::make_reference_grey(oop obj) {
   if (_cm->par_mark(obj)) {
     // No OrderAccess:store_load() is needed. It is implicit in the
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp	Thu Nov 24 11:27:57 2016 +0100
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
+#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
+
+oop G1CMObjArrayProcessor::encode_array_slice(HeapWord* addr) {
+  return oop((void*)((uintptr_t)addr | ArraySliceBit));
+}
+
+HeapWord* G1CMObjArrayProcessor::decode_array_slice(oop value) {
+  assert(is_array_slice(value), "Given value " PTR_FORMAT " is not an array slice", p2i(value));
+  return (HeapWord*)((uintptr_t)(void*)value & ~ArraySliceBit);
+}
+
+void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) {
+  oop obj = encode_array_slice(what);
+  _task->push(obj);
+}
+
+size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) {
+  size_t words_to_scan = MIN2(remaining, ObjArrayMarkingStride);
+
+  if (remaining > ObjArrayMarkingStride) {
+    push_array_slice(start_from + ObjArrayMarkingStride);
+  }
+
+  // Then process current area.
+  MemRegion mr(start_from, words_to_scan);
+  return _task->scan_objArray(obj, mr);
+}
+
+size_t G1CMObjArrayProcessor::process_obj(oop obj) {
+  assert(should_be_sliced(obj), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), (size_t)obj->size());
+
+  return process_array_slice(objArrayOop(obj), (HeapWord*)obj, (size_t)objArrayOop(obj)->size());
+}
+
+size_t G1CMObjArrayProcessor::process_slice(oop obj) {
+  HeapWord* const decoded_address = decode_array_slice(obj);
+
+  // Find the start address of the objArrayOop.
+  // Shortcut the BOT access if the given address is from a humongous object. The BOT
+  // slide is fast enough for "smaller" objects in non-humongous regions, but is slower
+  // than directly using heap region table.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  HeapRegion* r = g1h->heap_region_containing(decoded_address);
+
+  HeapWord* const start_address = r->is_humongous() ?
+                                  r->humongous_start_region()->bottom() :
+                                  g1h->block_start(decoded_address);
+
+  assert(oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address));
+  assert(start_address < decoded_address,
+         "Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT,
+         p2i(start_address),
+         p2i(decoded_address));
+
+  objArrayOop objArray = objArrayOop(start_address);
+
+  size_t already_scanned = decoded_address - start_address;
+  size_t remaining = objArray->size() - already_scanned;
+
+  return process_array_slice(objArray, decoded_address, remaining);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp	Thu Nov 24 11:27:57 2016 +0100
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP
+
+#include "oops/oopsHierarchy.hpp"
+#include "memory/allocation.hpp"
+
+class G1CMTask;
+
+// Helper class to mark through large objArrays during marking in an efficient way.
+// Instead of pushing large object arrays, we push continuations onto the
+// mark stack. These continuations are identified by having their LSB set.
+// This allows incremental processing of large objects.
+class G1CMObjArrayProcessor VALUE_OBJ_CLASS_SPEC {
+private:
+  // The bit mask for the continuation indicator of elements on the mark stack.
+  static const size_t ArraySliceBit = 1;
+
+  // Reference to the task for doing the actual work.
+  G1CMTask* _task;
+
+  // Encodes the given address as a continuation "oop".
+  oop encode_array_slice(HeapWord* addr);
+  // Remove the continuation marker from the given oop from the mark stack.
+  HeapWord* decode_array_slice(oop value);
+
+  // Push the continuation at the given address onto the mark stack.
+  void push_array_slice(HeapWord* addr);
+
+  // Process (apply the closure) on the given continuation of the given objArray.
+  size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining);
+public:
+  static bool is_array_slice(void* obj) { return ((uintptr_t)obj & ArraySliceBit) != 0; }
+
+  static bool should_be_sliced(oop obj);
+
+  G1CMObjArrayProcessor(G1CMTask* task) : _task(task) {
+  }
+
+  // Process the given continuation "oop". Returns the number of words scanned.
+  size_t process_slice(oop obj);
+  // Start processing the given objArrayOop by scanning the header and pushing its
+  // continuation.
+  size_t process_obj(oop obj);
+};
+
+#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp	Thu Nov 24 11:27:57 2016 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP
+#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP
+
+#include "oops/oop.inline.hpp"
+#include "oops/oopsHierarchy.hpp"
+#include "runtime/globals.hpp"
+
+inline bool G1CMObjArrayProcessor::should_be_sliced(oop obj) {
+  return obj->is_objArray() && ((size_t)((objArrayOop)obj)->size()) >= 2 * ObjArrayMarkingStride;
+}
+
+#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_INLINE_HPP */
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu Nov 24 09:33:54 2016 +0000
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Nov 24 11:27:57 2016 +0100
@@ -1988,7 +1988,7 @@
   experimental(uintx, WorkStealingSpinToYieldRatio, 10,                     \
           "Ratio of hard spins to calls to yield")                          \
                                                                             \
-  develop(uintx, ObjArrayMarkingStride, 512,                                \
+  develop(uintx, ObjArrayMarkingStride, 2048,                               \
           "Number of object array elements to push onto the marking stack " \
           "before pushing a continuation entry")                            \
                                                                             \