8221751: Shenandoah: Improve SATB enqueueing
authorrkennke
Tue, 02 Apr 2019 18:13:42 +0200
changeset 54383 cdc3bb0983a6
parent 54382 61616f509ef8
child 54384 cd3b7ad53265
8221751: Shenandoah: Improve SATB enqueueing Reviewed-by: shade
src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Tue Apr 02 08:19:36 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Tue Apr 02 18:13:42 2019 +0200
@@ -46,7 +46,18 @@
                                                        Register addr, Register count, RegSet saved_regs) {
   if (is_oop) {
     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-    if (!dest_uninitialized && !ShenandoahHeap::heap()->heuristics()->can_do_traversal_gc()) {
+    if (ShenandoahSATBBarrier && !dest_uninitialized && !ShenandoahHeap::heap()->heuristics()->can_do_traversal_gc()) {
+
+      Label done;
+
+      // Avoid calling runtime if count == 0
+      __ cbz(count, done);
+
+      // Is marking active?
+      Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+      __ ldrb(rscratch1, gc_state);
+      __ tbz(rscratch1, ShenandoahHeap::MARKING_BITPOS, done);
+
       __ push(saved_regs, sp);
       if (count == c_rarg0) {
         if (addr == c_rarg1) {
@@ -68,6 +79,7 @@
         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 2);
       }
       __ pop(saved_regs, sp);
+      __ bind(done);
     }
   }
 }
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Tue Apr 02 08:19:36 2019 -0700
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Tue Apr 02 18:13:42 2019 +0200
@@ -66,26 +66,22 @@
     }
 #endif
 
-    if (!dest_uninitialized && !ShenandoahHeap::heap()->heuristics()->can_do_traversal_gc()) {
+    if (ShenandoahSATBBarrier && !dest_uninitialized && !ShenandoahHeap::heap()->heuristics()->can_do_traversal_gc()) {
       Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
 #ifndef _LP64
       __ push(thread);
       __ get_thread(thread);
 #endif
 
-      Label filtered;
-      Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
-      // Is marking active?
-      if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-        __ cmpl(in_progress, 0);
-      } else {
-        assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-        __ cmpb(in_progress, 0);
-      }
+      Label done;
+      // Short-circuit if count == 0.
+      __ testptr(count, count);
+      __ jcc(Assembler::zero, done);
 
-      NOT_LP64(__ pop(thread);)
-
-        __ jcc(Assembler::equal, filtered);
+      // Avoid runtime call when not marking.
+      Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+      __ testb(gc_state, ShenandoahHeap::MARKING);
+      __ jcc(Assembler::zero, done);
 
       __ pusha();                      // push registers
 #ifdef _LP64
@@ -111,7 +107,8 @@
                       dst, count);
 #endif
       __ popa();
-      __ bind(filtered);
+      __ bind(done);
+      NOT_LP64(__ pop(thread);)
     }
   }
 
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp	Tue Apr 02 08:19:36 2019 -0700
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp	Tue Apr 02 18:13:42 2019 +0200
@@ -132,12 +132,23 @@
 template <class T>
 void ShenandoahBarrierSet::write_ref_array_pre_work(T* dst, size_t count) {
   shenandoah_assert_not_in_cset_loc_except(dst, _heap->cancelled_gc());
-  if (ShenandoahSATBBarrier && _heap->is_concurrent_mark_in_progress()) {
-    T* elem_ptr = dst;
-    for (size_t i = 0; i < count; i++, elem_ptr++) {
-      T heap_oop = RawAccess<>::oop_load(elem_ptr);
-      if (!CompressedOops::is_null(heap_oop)) {
-        enqueue(CompressedOops::decode_not_null(heap_oop));
+  assert(ShenandoahThreadLocalData::satb_mark_queue(Thread::current()).is_active(), "Shouldn't be here otherwise");
+  assert(ShenandoahSATBBarrier, "Shouldn't be here otherwise");
+  assert(count > 0, "Should have been filtered before");
+
+  Thread* thread = Thread::current();
+  ShenandoahMarkingContext* ctx = _heap->marking_context();
+  bool has_forwarded = _heap->has_forwarded_objects();
+  T* elem_ptr = dst;
+  for (size_t i = 0; i < count; i++, elem_ptr++) {
+    T heap_oop = RawAccess<>::oop_load(elem_ptr);
+    if (!CompressedOops::is_null(heap_oop)) {
+      oop obj = CompressedOops::decode_not_null(heap_oop);
+      if (has_forwarded) {
+        obj = resolve_forwarded_not_null(obj);
+      }
+      if (!ctx->is_marked(obj)) {
+        ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(obj);
       }
     }
   }
@@ -309,11 +320,9 @@
 }
 
 oop ShenandoahBarrierSet::storeval_barrier(oop obj) {
-  if (ShenandoahStoreValEnqueueBarrier) {
-    if (!CompressedOops::is_null(obj)) {
-      obj = write_barrier(obj);
-      enqueue(obj);
-    }
+  if (ShenandoahStoreValEnqueueBarrier && !CompressedOops::is_null(obj) && _heap->is_concurrent_traversal_in_progress()) {
+    obj = write_barrier(obj);
+    enqueue(obj);
   }
   if (ShenandoahStoreValReadBarrier) {
     obj = resolve_forwarded(obj);
@@ -329,14 +338,14 @@
 
 void ShenandoahBarrierSet::enqueue(oop obj) {
   shenandoah_assert_not_forwarded_if(NULL, obj, _heap->is_concurrent_traversal_in_progress());
-  if (!_satb_mark_queue_set.is_active()) return;
+  assert(_satb_mark_queue_set.is_active(), "only get here when SATB active");
 
   // Filter marked objects before hitting the SATB queues. The same predicate would
   // be used by SATBMQ::filter to eliminate already marked objects downstream, but
   // filtering here helps to avoid wasteful SATB queueing work to begin with.
   if (!_heap->requires_marking<false>(obj)) return;
 
-  ShenandoahThreadLocalData::satb_mark_queue(Thread::current()).enqueue(obj);
+  ShenandoahThreadLocalData::satb_mark_queue(Thread::current()).enqueue_known_active(obj);
 }
 
 void ShenandoahBarrierSet::on_thread_create(Thread* thread) {
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp	Tue Apr 02 08:19:36 2019 -0700
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp	Tue Apr 02 18:13:42 2019 +0200
@@ -141,7 +141,7 @@
   bool arraycopy_loop(T* src, T* dst, size_t length, Klass* bound, bool disjoint);
 
   template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
-  bool arraycopy_element(T* cur_src, T* cur_dst, Klass* bound, Thread* thread);
+  bool arraycopy_element(T* cur_src, T* cur_dst, Klass* bound, Thread* const thread, ShenandoahMarkingContext* const ctx);
 
 public:
   // Callbacks for runtime accesses.
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp	Tue Apr 02 08:19:36 2019 -0700
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp	Tue Apr 02 18:13:42 2019 +0200
@@ -28,6 +28,9 @@
 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
 #include "gc/shenandoah/shenandoahBrooksPointer.inline.hpp"
 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
+#include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 
 bool ShenandoahBarrierSet::need_update_refs_barrier() {
   return _heap->is_update_refs_in_progress() ||
@@ -59,7 +62,8 @@
   } while ((! oopDesc::equals_raw(compare_value, expected)) && oopDesc::equals_raw(resolve_forwarded(compare_value), resolve_forwarded(expected)));
   if (oopDesc::equals_raw(expected, compare_value)) {
     const bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0;
-    if (keep_alive && ShenandoahSATBBarrier && !CompressedOops::is_null(compare_value)) {
+    if (keep_alive && ShenandoahSATBBarrier && !CompressedOops::is_null(compare_value) &&
+        ShenandoahHeap::heap()->is_concurrent_mark_in_progress()) {
       ShenandoahBarrierSet::barrier_set()->enqueue(compare_value);
     }
   }
@@ -72,7 +76,8 @@
   oop previous = Raw::oop_atomic_xchg(new_value, addr);
   if (ShenandoahSATBBarrier) {
     const bool keep_alive = (decorators & AS_NO_KEEPALIVE) == 0;
-    if (keep_alive && !CompressedOops::is_null(previous)) {
+    if (keep_alive && !CompressedOops::is_null(previous) &&
+        ShenandoahHeap::heap()->is_concurrent_mark_in_progress()) {
       ShenandoahBarrierSet::barrier_set()->enqueue(previous);
     }
   }
@@ -134,7 +139,7 @@
 template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
 bool ShenandoahBarrierSet::arraycopy_loop(T* src, T* dst, size_t length, Klass* bound, bool disjoint) {
   Thread* thread = Thread::current();
-
+  ShenandoahMarkingContext* ctx = _heap->marking_context();
   ShenandoahEvacOOMScope oom_evac_scope;
 
   // We need to handle four cases:
@@ -161,7 +166,7 @@
     T* cur_dst = dst;
     T* src_end = src + length;
     for (; cur_src < src_end; cur_src++, cur_dst++) {
-      if (!arraycopy_element<T, CHECKCAST, SATB, STOREVAL_MODE>(cur_src, cur_dst, bound, thread)) {
+      if (!arraycopy_element<T, CHECKCAST, SATB, STOREVAL_MODE>(cur_src, cur_dst, bound, thread, ctx)) {
         return false;
       }
     }
@@ -170,7 +175,7 @@
     T* cur_src = src + length - 1;
     T* cur_dst = dst + length - 1;
     for (; cur_src >= src; cur_src--, cur_dst--) {
-      if (!arraycopy_element<T, CHECKCAST, SATB, STOREVAL_MODE>(cur_src, cur_dst, bound, thread)) {
+      if (!arraycopy_element<T, CHECKCAST, SATB, STOREVAL_MODE>(cur_src, cur_dst, bound, thread, ctx)) {
         return false;
       }
     }
@@ -179,14 +184,26 @@
 }
 
 template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
-bool ShenandoahBarrierSet::arraycopy_element(T* cur_src, T* cur_dst, Klass* bound, Thread* thread) {
+bool ShenandoahBarrierSet::arraycopy_element(T* cur_src, T* cur_dst, Klass* bound, Thread* const thread, ShenandoahMarkingContext* const ctx) {
   T o = RawAccess<>::oop_load(cur_src);
 
   if (SATB) {
+    assert(ShenandoahThreadLocalData::satb_mark_queue(thread).is_active(), "Shouldn't be here otherwise");
     T prev = RawAccess<>::oop_load(cur_dst);
     if (!CompressedOops::is_null(prev)) {
       oop prev_obj = CompressedOops::decode_not_null(prev);
-      enqueue(prev_obj);
+      switch (STOREVAL_MODE) {
+      case NONE:
+        break;
+      case READ_BARRIER:
+      case WRITE_BARRIER:
+        // The write-barrier case cannot really happen. It's traversal-only and traversal
+        // doesn't currently use SATB. And even if it did, it would not be fatal to just do the normal RB here.
+        prev_obj = ShenandoahBarrierSet::resolve_forwarded_not_null(prev_obj);
+      }
+      if (!ctx->is_marked(prev_obj)) {
+        ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(prev_obj);
+      }
     }
   }
 
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Tue Apr 02 08:19:36 2019 -0700
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Tue Apr 02 18:13:42 2019 +0200
@@ -51,7 +51,8 @@
   }
   shenandoah_assert_correct(NULL, orig);
   // store the original value that was in the field reference
-  ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue(orig);
+  assert(ShenandoahThreadLocalData::satb_mark_queue(thread).is_active(), "Shouldn't be here otherwise");
+  ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(orig);
 JRT_END
 
 JRT_LEAF(oopDesc*, ShenandoahRuntime::write_barrier_JRT(oopDesc* src))