8175554: Improve G1UpdateRSOrPushRefClosure
authortschatzl
Wed, 28 Jun 2017 10:58:19 +0200
changeset 46572 fef0d64b2263
parent 46571 c70b36f0730d
child 46573 0f8fae16c5b4
8175554: Improve G1UpdateRSOrPushRefClosure Summary: Micro-optimizations and improving naming for the G1UpdateRSOrPushRefClosure Reviewed-by: ehelin, sangheki
hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp
hotspot/src/share/vm/gc/g1/g1OopClosures.cpp
hotspot/src/share/vm/gc/g1/g1OopClosures.hpp
hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp
hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp
hotspot/src/share/vm/gc/g1/g1RemSet.cpp
hotspot/src/share/vm/gc/g1/g1RemSet.hpp
hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp
hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Jun 28 10:58:19 2017 +0200
@@ -3458,13 +3458,11 @@
 
       _root_processor->evacuate_roots(pss->closures(), worker_id);
 
-      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
-
       // We pass a weak code blobs closure to the remembered set scanning because we want to avoid
       // treating the nmethods visited to act as roots for concurrent marking.
       // We only want to make sure that the oops in the nmethods are adjusted with regard to the
       // objects copied by the current evacuation.
-      _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
+      _g1h->g1_rem_set()->oops_into_collection_set_do(pss,
                                                       pss->closures()->weak_codeblobs(),
                                                       worker_id);
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -1136,6 +1136,7 @@
   // set. Assumes that the reference points into the heap.
   inline bool is_in_cset(const HeapRegion *hr);
   inline bool is_in_cset(oop obj);
+  inline bool is_in_cset(HeapWord* addr);
 
   inline bool is_in_cset_or_humongous(const oop obj);
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -139,7 +139,11 @@
 }
 
 inline bool G1CollectedHeap::is_in_cset(oop obj) {
-  return _in_cset_fast_test.is_in_cset((HeapWord*)obj);
+  return is_in_cset((HeapWord*)obj);
+}
+
+inline bool G1CollectedHeap::is_in_cset(HeapWord* addr) {
+  return _in_cset_fast_test.is_in_cset(addr);
 }
 
 bool G1CollectedHeap::is_in_cset(const HeapRegion* hr) {
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Wed Jun 28 10:58:19 2017 +0200
@@ -38,7 +38,7 @@
   _cm(_g1->concurrent_mark())
 { }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ScanClosureBase::G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _par_scan_state(par_scan_state)
 { }
 
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
 #define SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
 
+#include "gc/g1/g1InCSetState.hpp"
 #include "memory/iterator.hpp"
 #include "oops/markOop.hpp"
 
@@ -47,34 +48,60 @@
   void set_region(HeapRegion* from) { _from = from; }
 };
 
-class G1ParClosureSuper : public OopsInHeapRegionClosure {
+class G1ScanClosureBase : public OopsInHeapRegionClosure {
 protected:
   G1CollectedHeap* _g1;
   G1ParScanThreadState* _par_scan_state;
 
-  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
-  ~G1ParClosureSuper() { }
+  G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  ~G1ScanClosureBase() { }
 
+  template <class T>
+  inline void prefetch_and_push(T* p, oop const obj);
+
+  template <class T>
+  inline void handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj);
 public:
   // This closure needs special handling for InstanceRefKlass.
   virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
 };
 
-class G1ParPushHeapRSClosure : public G1ParClosureSuper {
+// Used during the Update RS phase to refine remaining cards in the DCQ during garbage collection.
+class G1ScanObjsDuringUpdateRSClosure: public G1ScanClosureBase {
+  uint _worker_i;
+  bool _has_refs_into_cset;
+
 public:
-  G1ParPushHeapRSClosure(G1CollectedHeap* g1,
-                         G1ParScanThreadState* par_scan_state):
-    G1ParClosureSuper(g1, par_scan_state) { }
+  G1ScanObjsDuringUpdateRSClosure(G1CollectedHeap* g1h,
+                                  G1ParScanThreadState* pss,
+                                  uint worker_i) :
+    G1ScanClosureBase(g1h, pss), _has_refs_into_cset(false), _worker_i(worker_i) { }
+
+  void reset_has_refs_into_cset() { _has_refs_into_cset = false; }
+  bool has_refs_into_cset() const { return _has_refs_into_cset; }
+
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+};
+
+// Used during the Scan RS phase to scan cards from the remembered set during garbage collection.
+class G1ScanObjsDuringScanRSClosure : public G1ScanClosureBase {
+public:
+  G1ScanObjsDuringScanRSClosure(G1CollectedHeap* g1,
+                                G1ParScanThreadState* par_scan_state):
+    G1ScanClosureBase(g1, par_scan_state) { }
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };
 
-class G1ParScanClosure : public G1ParClosureSuper {
+// This closure is applied to the fields of the objects that have just been copied during evacuation.
+class G1ScanEvacuatedObjClosure : public G1ScanClosureBase {
 public:
-  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
-    G1ParClosureSuper(g1, par_scan_state) { }
+  G1ScanEvacuatedObjClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ScanClosureBase(g1, par_scan_state) { }
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
@@ -186,42 +213,7 @@
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
-  virtual void do_oop(oop* p) { do_oop_nv(p); }
-};
-
-class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure {
-  G1CollectedHeap* _g1;
-  HeapRegion* _from;
-  G1ParPushHeapRSClosure* _push_ref_cl;
-  bool _record_refs_into_cset;
-  uint _worker_i;
-  bool _has_refs_into_cset;
-
-public:
-  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                                G1ParPushHeapRSClosure* push_ref_cl,
-                                bool record_refs_into_cset,
-                                uint worker_i = 0);
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  bool self_forwarded(oop obj) {
-    markOop m = obj->mark();
-    bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj));
-    return result;
-  }
-
-  bool has_refs_into_cset() const { return _has_refs_into_cset; }
-
-  template <class T> inline void do_oop_nv(T* p);
-  virtual inline void do_oop(narrowOop* p);
-  virtual inline void do_oop(oop* p);
-
-  // This closure needs special handling for InstanceRefKlass.
-  virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
 };
 
 #endif // SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -36,61 +36,51 @@
 #include "memory/iterator.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
-// This closure is applied to the fields of the objects that have just been copied.
 template <class T>
-inline void G1ParScanClosure::do_oop_nv(T* p) {
-  T heap_oop = oopDesc::load_heap_oop(p);
-
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    const InCSetState state = _g1->in_cset_state(obj);
-    if (state.is_in_cset()) {
-      // We're not going to even bother checking whether the object is
-      // already forwarded or not, as this usually causes an immediate
-      // stall. We'll try to prefetch the object (for write, given that
-      // we might need to install the forwarding reference) and we'll
-      // get back to it when pop it from the queue
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+inline void G1ScanClosureBase::prefetch_and_push(T* p, const oop obj) {
+  // We're not going to even bother checking whether the object is
+  // already forwarded or not, as this usually causes an immediate
+  // stall. We'll try to prefetch the object (for write, given that
+  // we might need to install the forwarding reference) and we'll
+  // get back to it when pop it from the queue
+  Prefetch::write(obj->mark_addr(), 0);
+  Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
 
-      // slightly paranoid test; I'm trying to catch potential
-      // problems before we go into push_on_queue to know where the
-      // problem is coming from
-      assert((obj == oopDesc::load_decode_heap_oop(p)) ||
-             (obj->is_forwarded() &&
-                 obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
-             "p should still be pointing to obj or to its forwardee");
+  // slightly paranoid test; I'm trying to catch potential
+  // problems before we go into push_on_queue to know where the
+  // problem is coming from
+  assert((obj == oopDesc::load_decode_heap_oop(p)) ||
+         (obj->is_forwarded() &&
+         obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
+         "p should still be pointing to obj or to its forwardee");
 
-      _par_scan_state->push_on_queue(p);
-    } else {
-      if (state.is_humongous()) {
-        _g1->set_humongous_is_live(obj);
-      } else if (state.is_ext()) {
-        _par_scan_state->do_oop_ext(p);
-      }
-      _par_scan_state->update_rs(_from, p, obj);
-    }
+  _par_scan_state->push_on_queue(p);
+}
+
+template <class T>
+inline void G1ScanClosureBase::handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj) {
+  if (state.is_humongous()) {
+    _g1->set_humongous_is_live(obj);
+  } else if (state.is_ext()) {
+    _par_scan_state->do_oop_ext(p);
   }
 }
 
 template <class T>
-inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+inline void G1ScanEvacuatedObjClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    const InCSetState state = _g1->in_cset_state(obj);
-    if (state.is_in_cset_or_humongous()) {
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    prefetch_and_push(p, obj);
+  } else {
+    handle_non_cset_obj_common(state, p, obj);
 
-      // Place on the references queue
-      _par_scan_state->push_on_queue(p);
-    } else if (state.is_ext()) {
-      _par_scan_state->do_oop_ext(p);
-    } else {
-      assert(!_g1->is_in_cset(obj), "checking");
-    }
+    _par_scan_state->update_rs(_from, p, obj);
   }
 }
 
@@ -145,10 +135,10 @@
     // Normally this closure should only be called with cross-region references.
     // But since Java threads are manipulating the references concurrently and we
     // reload the values things may have changed.
-    // This check lets slip through references from a humongous continues region
+    // Also this check lets slip through references from a humongous continues region
     // to its humongous start region, as they are in different regions, and adds a
-    // remembered set entry. This is benign (apart from memory usage), as this
-    // closure is never called during evacuation.
+    // remembered set entry. This is benign (apart from memory usage), as we never
+    // try to either evacuate or eager reclaim humonguous arrays of j.l.O.
     return;
   }
 
@@ -159,79 +149,50 @@
 }
 
 template <class T>
-inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
-  if (obj == NULL) {
-    return;
-  }
-
-#ifdef ASSERT
-  // can't do because of races
-  // assert(obj == NULL || obj->is_oop(), "expected an oop");
-  assert(check_obj_alignment(obj), "not oop aligned");
-  assert(_g1->is_in_reserved(obj), "must be in heap");
-#endif // ASSERT
-
-  assert(_from != NULL, "from region must be non-NULL");
-  assert(_from->is_in_reserved(p) ||
-         (_from->is_humongous() &&
-          _g1->heap_region_containing(p)->is_humongous() &&
-          _from->humongous_start_region() == _g1->heap_region_containing(p)->humongous_start_region()),
-         "p " PTR_FORMAT " is not in the same region %u or part of the correct humongous object starting at region %u.",
-         p2i(p), _from->hrm_index(), _from->humongous_start_region()->hrm_index());
-
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (_from == to) {
-    // Normally this closure should only be called with cross-region references.
-    // But since Java threads are manipulating the references concurrently and we
-    // reload the values things may have changed.
-    // Also this check lets slip through references from a humongous continues region
-    // to its humongous start region, as they are in different regions, and adds a
-    // remembered set entry. This is benign (apart from memory usage), as we never
-    // try to either evacuate or eager reclaim these kind of regions.
+inline void G1ScanObjsDuringUpdateRSClosure::do_oop_nv(T* p) {
+  T o = oopDesc::load_heap_oop(p);
+  if (oopDesc::is_null(o)) {
     return;
   }
+  oop obj = oopDesc::decode_heap_oop_not_null(o);
 
-  // The _record_refs_into_cset flag is true during the RSet
-  // updating part of an evacuation pause. It is false at all
-  // other times:
-  //  * rebuilding the remembered sets after a full GC
-  //  * during concurrent refinement.
-  //  * updating the remembered sets of regions in the collection
-  //    set in the event of an evacuation failure (when deferred
-  //    updates are enabled).
+  check_obj_during_refinement(p, obj);
 
-  if (_record_refs_into_cset && to->in_collection_set()) {
-    // We are recording references that point into the collection
-    // set and this particular reference does exactly that...
-    // If the referenced object has already been forwarded
-    // to itself, we are handling an evacuation failure and
-    // we have already visited/tried to copy this object
-    // there is no need to retry.
-    if (!self_forwarded(obj)) {
-    assert(_push_ref_cl != NULL, "should not be null");
-    // Push the reference in the refs queue of the G1ParScanThreadState
-    // instance for this worker thread.
-      _push_ref_cl->do_oop(p);
+  assert(!_g1->is_in_cset((HeapWord*)p), "Oop originates from " PTR_FORMAT " (region: %u) which is in the collection set.", p2i(p), _g1->addr_to_region((HeapWord*)p));
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    // Since the source is always from outside the collection set, here we implicitly know
+    // that this is a cross-region reference too.
+    prefetch_and_push(p, obj);
+
+    _has_refs_into_cset = true;
+  } else {
+    HeapRegion* to = _g1->heap_region_containing(obj);
+    if (_from == to) {
+      return;
     }
-    _has_refs_into_cset = true;
 
-    // Deferred updates to the CSet are either discarded (in the normal case),
-    // or processed (if an evacuation failure occurs) at the end
-    // of the collection.
-    // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-  } else {
-    // We either don't care about pushing references that point into the
-    // collection set (i.e. we're not during an evacuation pause) _or_
-    // the reference doesn't point into the collection set. Either way
-    // we add the reference directly to the RSet of the region containing
-    // the referenced object.
-    assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+    handle_non_cset_obj_common(state, p, obj);
+
     to->rem_set()->add_reference(p, _worker_i);
   }
 }
-void G1UpdateRSOrPushRefOopClosure::do_oop(oop* p)       { do_oop_nv(p); }
-void G1UpdateRSOrPushRefOopClosure::do_oop(narrowOop* p) { do_oop_nv(p); }
+
+template <class T>
+inline void G1ScanObjsDuringScanRSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    prefetch_and_push(p, obj);
+  } else {
+    handle_non_cset_obj_common(state, p, obj);
+  }
+}
 
 template <class T>
 void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) {
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,7 +54,7 @@
   InCSetState       _dest[InCSetState::Num];
   // Local tenuring threshold.
   uint              _tenuring_threshold;
-  G1ParScanClosure  _scanner;
+  G1ScanEvacuatedObjClosure  _scanner;
 
   int  _hash_seed;
   uint _worker_id;
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Jun 28 10:58:19 2017 +0200
@@ -327,11 +327,11 @@
 }
 
 G1ScanRSClosure::G1ScanRSClosure(G1RemSetScanState* scan_state,
-                                 G1ParPushHeapRSClosure* push_heap_cl,
+                                 G1ScanObjsDuringScanRSClosure* scan_obj_on_card,
                                  CodeBlobClosure* code_root_cl,
                                  uint worker_i) :
   _scan_state(scan_state),
-  _push_heap_cl(push_heap_cl),
+  _scan_objs_on_card_cl(scan_obj_on_card),
   _code_root_cl(code_root_cl),
   _strong_code_root_scan_time_sec(0.0),
   _cards_claimed(0),
@@ -353,8 +353,8 @@
     // but they're benign), which reduces the number of duplicate
     // scans (the rsets of the regions in the cset can intersect).
     _ct_bs->set_card_claimed(index);
-    _push_heap_cl->set_region(r);
-    r->oops_on_card_seq_iterate_careful<true>(mr, _push_heap_cl);
+    _scan_objs_on_card_cl->set_region(r);
+    r->oops_on_card_seq_iterate_careful<true>(mr, _scan_objs_on_card_cl);
     _cards_scanned++;
   }
 }
@@ -413,12 +413,13 @@
   return false;
 }
 
-void G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
+void G1RemSet::scan_rem_set(G1ParScanThreadState* pss,
                             CodeBlobClosure* heap_region_codeblobs,
                             uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
-  G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i);
+  G1ScanObjsDuringScanRSClosure scan_cl(_g1, pss);
+  G1ScanRSClosure cl(_scan_state, &scan_cl, heap_region_codeblobs, worker_i);
   _g1->collection_set_iterate_from(&cl, worker_i);
 
   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
@@ -441,12 +442,12 @@
 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
   G1RemSet* _g1rs;
   DirtyCardQueue* _into_cset_dcq;
-  G1ParPushHeapRSClosure* _cl;
+  G1ScanObjsDuringUpdateRSClosure* _update_rs_cl;
 public:
   RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
                                               DirtyCardQueue* into_cset_dcq,
-                                              G1ParPushHeapRSClosure* cl) :
-    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _cl(cl)
+                                              G1ScanObjsDuringUpdateRSClosure* update_rs_cl) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _update_rs_cl(update_rs_cl)
   {}
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
@@ -455,9 +456,8 @@
     // is during RSet updating within an evacuation pause.
     // In this case worker_i should be the id of a GC worker thread.
     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
-    assert(worker_i < ParallelGCThreads, "should be a GC worker");
 
-    if (_g1rs->refine_card_during_gc(card_ptr, worker_i, _cl)) {
+    if (_g1rs->refine_card_during_gc(card_ptr, _update_rs_cl)) {
       // 'card_ptr' contains references that point into the collection
       // set. We need to record the card in the DCQS
       // (_into_cset_dirty_card_queue_set)
@@ -471,9 +471,10 @@
 };
 
 void G1RemSet::update_rem_set(DirtyCardQueue* into_cset_dcq,
-                              G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              G1ParScanThreadState* pss,
                               uint worker_i) {
-  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, oops_in_heap_closure);
+  G1ScanObjsDuringUpdateRSClosure update_rs_cl(_g1, pss, worker_i);
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, &update_rs_cl);
 
   G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   if (G1HotCardCache::default_use_cache()) {
@@ -489,7 +490,7 @@
   HeapRegionRemSet::cleanup();
 }
 
-void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
+void G1RemSet::oops_into_collection_set_do(G1ParScanThreadState* pss,
                                            CodeBlobClosure* heap_region_codeblobs,
                                            uint worker_i) {
   // A DirtyCardQueue that is used to hold cards containing references
@@ -503,8 +504,8 @@
   // DirtyCardQueueSet that is used to manage RSet updates
   DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
 
-  update_rem_set(&into_cset_dcq, cl, worker_i);
-  scan_rem_set(cl, heap_region_codeblobs, worker_i);;
+  update_rem_set(&into_cset_dcq, pss, worker_i);
+  scan_rem_set(pss, heap_region_codeblobs, worker_i);;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
@@ -579,17 +580,6 @@
 #endif
 }
 
-G1UpdateRSOrPushRefOopClosure::G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                                                             G1ParPushHeapRSClosure* push_ref_cl,
-                                                             bool record_refs_into_cset,
-                                                             uint worker_i) :
-  _g1(g1h),
-  _from(NULL),
-  _record_refs_into_cset(record_refs_into_cset),
-  _has_refs_into_cset(false),
-  _push_ref_cl(push_ref_cl),
-  _worker_i(worker_i) { }
-
 void G1RemSet::refine_card_concurrently(jbyte* card_ptr,
                                         uint worker_i) {
   assert(!_g1->is_gc_active(), "Only call concurrently");
@@ -740,8 +730,7 @@
 }
 
 bool G1RemSet::refine_card_during_gc(jbyte* card_ptr,
-                                     uint worker_i,
-                                     G1ParPushHeapRSClosure*  oops_in_heap_closure) {
+                                     G1ScanObjsDuringUpdateRSClosure* update_rs_cl) {
   assert(_g1->is_gc_active(), "Only call during GC");
 
   check_card_ptr(card_ptr, _ct_bs);
@@ -775,19 +764,14 @@
   MemRegion dirty_region(card_start, MIN2(scan_limit, card_end));
   assert(!dirty_region.is_empty(), "sanity");
 
-  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
-                                                 oops_in_heap_closure,
-                                                 true,
-                                                 worker_i);
-  update_rs_oop_cl.set_from(r);
+  update_rs_cl->set_region(r);
+  update_rs_cl->reset_has_refs_into_cset();
 
-  bool card_processed =
-    r->oops_on_card_seq_iterate_careful<true>(dirty_region,
-                                              &update_rs_oop_cl);
+  bool card_processed = r->oops_on_card_seq_iterate_careful<true>(dirty_region, update_rs_cl);
   assert(card_processed, "must be");
   _conc_refine_cards++;
 
-  return update_rs_oop_cl.has_refs_into_cset();
+  return update_rs_cl->has_refs_into_cset();
 }
 
 void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) {
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -41,10 +41,12 @@
 class CodeBlobClosure;
 class G1CollectedHeap;
 class G1HotCardCache;
-class G1ParPushHeapRSClosure;
 class G1RemSetScanState;
+class G1ParScanThreadState;
 class G1Policy;
 class G1SATBCardTableModRefBS;
+class G1ScanObjsDuringScanRSClosure;
+class G1ScanObjsDuringUpdateRSClosure;
 class HeapRegionClaimer;
 
 // A G1RemSet in which each heap region has a rem set that records the
@@ -94,21 +96,12 @@
            G1HotCardCache* hot_card_cache);
   ~G1RemSet();
 
-  // Invoke "cl->do_oop" on all pointers into the collection set
-  // from objects in regions outside the collection set (having
-  // invoked "cl->set_region" to set the "from" region correctly
-  // beforehand.)
+  // Process all oops in the collection set from the cards in the refinement buffers and
+  // remembered sets using pss.
   //
-  // Apply non_heap_roots on the oops of the unmarked nmethods
-  // on the strong code roots list for each region in the
-  // collection set.
-  //
-  // The "worker_i" param is for the parallel case where the id
-  // of the worker thread calling this function can be helpful in
-  // partitioning the work to be done. It should be the same as
-  // the "i" passed to the calling thread's work(i) function.
-  // In the sequential case this param will be ignored.
-  void oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
+  // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code
+  // roots list for each region in the collection set.
+  void oops_into_collection_set_do(G1ParScanThreadState* pss,
                                    CodeBlobClosure* heap_region_codeblobs,
                                    uint worker_i);
 
@@ -120,15 +113,14 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  void scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
+  void scan_rem_set(G1ParScanThreadState* pss,
                     CodeBlobClosure* heap_region_codeblobs,
                     uint worker_i);
 
   G1RemSetScanState* scan_state() const { return _scan_state; }
 
-  // Flush remaining refinement buffers into the remembered set,
-  // applying oops_in_heap_closure on the references found.
-  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParPushHeapRSClosure* oops_in_heap_closure, uint worker_i);
+  // Flush remaining refinement buffers into the remembered set.
+  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParScanThreadState* pss, uint worker_i);
 
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.
@@ -149,8 +141,7 @@
   // Refine the card corresponding to "card_ptr". Returns "true" if the given card contains
   // oops that have references into the current collection set.
   bool refine_card_during_gc(jbyte* card_ptr,
-                             uint worker_i,
-                             G1ParPushHeapRSClosure* oops_in_heap_closure);
+                             G1ScanObjsDuringUpdateRSClosure* update_rs_cl);
 
   // Print accumulated summary info from the start of the VM.
   void print_summary_info();
@@ -188,7 +179,7 @@
 
   G1CollectedHeap* _g1h;
 
-  G1ParPushHeapRSClosure* _push_heap_cl;
+  G1ScanObjsDuringScanRSClosure* _scan_objs_on_card_cl;
   CodeBlobClosure* _code_root_cl;
 
   G1BlockOffsetTable* _bot;
@@ -202,7 +193,7 @@
   void scan_strong_code_roots(HeapRegion* r);
 public:
   G1ScanRSClosure(G1RemSetScanState* scan_state,
-                  G1ParPushHeapRSClosure* push_heap_cl,
+                  G1ScanObjsDuringScanRSClosure* scan_obj_on_card,
                   CodeBlobClosure* code_root_cl,
                   uint worker_i);
 
--- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,6 @@
 class G1CollectedHeap;
 class G1EvacuationRootClosures;
 class G1GCPhaseTimes;
-class G1ParPushHeapRSClosure;
 class G1RootClosures;
 class Monitor;
 class OopClosure;
--- a/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp	Wed Jun 14 11:26:44 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp	Wed Jun 28 10:58:19 2017 +0200
@@ -32,19 +32,19 @@
 
 // Forward declarations.
 
-class G1ParScanClosure;
-class G1ParPushHeapRSClosure;
+class G1ScanEvacuatedObjClosure;
 
-class G1UpdateRSOrPushRefOopClosure;
+class G1ScanObjsDuringUpdateRSClosure;
+class G1ScanObjsDuringScanRSClosure;
 class G1ConcurrentRefineOopClosure;
 
 class G1CMOopClosure;
 class G1RootRegionScanClosure;
 
 #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_G1(f) \
-      f(G1ParScanClosure,_nv)                      \
-      f(G1ParPushHeapRSClosure,_nv)                \
-      f(G1UpdateRSOrPushRefOopClosure,_nv)         \
+      f(G1ScanEvacuatedObjClosure,_nv)             \
+      f(G1ScanObjsDuringUpdateRSClosure,_nv)       \
+      f(G1ScanObjsDuringScanRSClosure,_nv)         \
       f(G1ConcurrentRefineOopClosure,_nv)          \
       f(G1CMOopClosure,_nv)                        \
       f(G1RootRegionScanClosure,_nv)