8153503: Move remset scan iteration claim to remset local data structure
authortschatzl
Mon, 18 Apr 2016 16:51:14 +0200
changeset 37988 bf4018edea5e
parent 37987 7b0667c9e794
child 37989 e0a0f4de45c4
8153503: Move remset scan iteration claim to remset local data structure Reviewed-by: drwhite, mgerdin
hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp
hotspot/src/share/vm/gc/g1/g1RemSet.cpp
hotspot/src/share/vm/gc/g1/g1RemSet.hpp
hotspot/src/share/vm/gc/g1/heapRegionRemSet.cpp
hotspot/src/share/vm/gc/g1/heapRegionRemSet.hpp
hotspot/src/share/vm/gc/g1/heapRegionSet.cpp
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Mon Apr 18 16:51:14 2016 +0200
@@ -98,7 +98,7 @@
   RefineCardTableEntryClosure() : _concurrent(true) { }
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
-    bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false);
+    bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, NULL);
     // This path is executed by the concurrent refine or mutator threads,
     // concurrently, and so we do not care if card_ptr contains references
     // that point into the collection set.
@@ -3090,28 +3090,6 @@
     }
 };
 
-#ifdef ASSERT
-class VerifyCSetClosure: public HeapRegionClosure {
-public:
-  bool doHeapRegion(HeapRegion* hr) {
-    // Here we check that the CSet region's RSet is ready for parallel
-    // iteration. The fields that we'll verify are only manipulated
-    // when the region is part of a CSet and is collected. Afterwards,
-    // we reset these fields when we clear the region's RSet (when the
-    // region is freed) so they are ready when the region is
-    // re-allocated. The only exception to this is if there's an
-    // evacuation failure and instead of freeing the region we leave
-    // it in the heap. In that case, we reset these fields during
-    // evacuation failure handling.
-    guarantee(hr->rem_set()->verify_ready_for_par_iteration(), "verification");
-
-    // Here's a good place to add any other checks we'd like to
-    // perform on CSet regions.
-    return false;
-  }
-};
-#endif // ASSERT
-
 uint G1CollectedHeap::num_task_queues() const {
   return _task_queues->size();
 }
@@ -3354,11 +3332,6 @@
           }
         }
 
-#ifdef ASSERT
-        VerifyCSetClosure cl;
-        collection_set_iterate(&cl);
-#endif // ASSERT
-
         // Initialize the GC alloc regions.
         _allocator->init_gc_alloc_regions(evacuation_info);
 
--- a/hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1EvacFailure.cpp	Mon Apr 18 16:51:14 2016 +0200
@@ -227,15 +227,6 @@
                                                during_conc_mark);
         _g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr);
 
-        // In the common case (i.e. when there is no evacuation
-        // failure) we make sure that the following is done when
-        // the region is freed so that it is "ready-to-go" when it's
-        // re-allocated. However, when evacuation failure happens, a
-        // region will remain in the heap and might ultimately be added
-        // to a CSet in the future. So we have to be careful here and
-        // make sure the region's RSet is ready for parallel iteration
-        // whenever this might be required in the future.
-        hr->rem_set()->reset_for_par_iteration();
         hr->reset_bot();
 
         size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark);
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Mon Apr 18 16:51:14 2016 +0200
@@ -45,20 +45,108 @@
 #include "utilities/intHisto.hpp"
 #include "utilities/stack.inline.hpp"
 
+// Collects information about the overall remembered set scan progress during an evacuation.
+class G1RemSetScanState : public CHeapObj<mtGC> {
+private:
+  size_t _max_regions;
+
+  // Scan progress for the remembered set of a single region. Transitions from
+  // Unclaimed -> Claimed -> Complete.
+  // At each of the transitions the thread that does the transition needs to perform
+  // some special action once. This is the reason for the extra "Claimed" state.
+  typedef jint G1RemsetIterState;
+
+  static const G1RemsetIterState Unclaimed = 0; // The remembered set has not been scanned yet.
+  static const G1RemsetIterState Claimed = 1;   // The remembered set is currently being scanned.
+  static const G1RemsetIterState Complete = 2;  // The remembered set has been completely scanned.
+
+  G1RemsetIterState volatile* _iter_states;
+  // The current location where the next thread should continue scanning in a region's
+  // remembered set.
+  size_t volatile* _iter_claims;
+
+public:
+  G1RemSetScanState() :
+    _max_regions(0),
+    _iter_states(NULL),
+    _iter_claims(NULL) {
+
+  }
+
+  ~G1RemSetScanState() {
+    if (_iter_states != NULL) {
+      FREE_C_HEAP_ARRAY(G1RemsetIterState, _iter_states);
+    }
+    if (_iter_claims != NULL) {
+      FREE_C_HEAP_ARRAY(size_t, _iter_claims);
+    }
+  }
+
+  void initialize(uint max_regions) {
+    assert(_iter_states == NULL, "Must not be initialized twice");
+    assert(_iter_claims == NULL, "Must not be initialized twice");
+    _max_regions = max_regions;
+    _iter_states = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC);
+    _iter_claims = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+  }
+
+  void reset() {
+    for (uint i = 0; i < _max_regions; i++) {
+      _iter_states[i] = Unclaimed;
+    }
+    memset((void*)_iter_claims, 0, _max_regions * sizeof(size_t));
+  }
+
+  // Attempt to claim the remembered set of the region for iteration. Returns true
+  // if this call caused the transition from Unclaimed to Claimed.
+  inline bool claim_iter(uint region) {
+    assert(region < _max_regions, "Tried to access invalid region %u", region);
+    if (_iter_states[region] != Unclaimed) {
+      return false;
+    }
+    jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_states[region]), Unclaimed);
+    return (res == Unclaimed);
+  }
+
+  // Try to atomically sets the iteration state to "complete". Returns true for the
+  // thread that caused the transition.
+  inline bool set_iter_complete(uint region) {
+    if (iter_is_complete(region)) {
+      return false;
+    }
+    jint res = Atomic::cmpxchg(Complete, (jint*)(&_iter_states[region]), Claimed);
+    return (res == Claimed);
+  }
+
+  // Returns true if the region's iteration is complete.
+  inline bool iter_is_complete(uint region) const {
+    assert(region < _max_regions, "Tried to access invalid region %u", region);
+    return _iter_states[region] == Complete;
+  }
+
+  // The current position within the remembered set of the given region.
+  inline size_t iter_claimed(uint region) const {
+    assert(region < _max_regions, "Tried to access invalid region %u", region);
+    return _iter_claims[region];
+  }
+
+  // Claim the next block of cards within the remembered set of the region with
+  // step size.
+  inline size_t iter_claimed_next(uint region, size_t step) {
+    return Atomic::add(step, &_iter_claims[region]) - step;
+  }
+};
+
 G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) :
   _g1(g1),
+  _scan_state(new G1RemSetScanState()),
   _conc_refine_cards(0),
   _ct_bs(ct_bs),
   _g1p(_g1->g1_policy()),
   _cg1r(g1->concurrent_g1_refine()),
-  _cset_rs_update_cl(NULL),
   _prev_period_summary(),
   _into_cset_dirty_card_queue_set(false)
 {
-  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
-  for (uint i = 0; i < n_workers(); i++) {
-    _cset_rs_update_cl[i] = NULL;
-  }
   if (log_is_enabled(Trace, gc, remset)) {
     _prev_period_summary.initialize(this);
   }
@@ -74,10 +162,9 @@
 }
 
 G1RemSet::~G1RemSet() {
-  for (uint i = 0; i < n_workers(); i++) {
-    assert(_cset_rs_update_cl[i] == NULL, "it should be");
+  if (_scan_state != NULL) {
+    delete _scan_state;
   }
-  FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl);
 }
 
 uint G1RemSet::num_par_rem_sets() {
@@ -86,6 +173,7 @@
 
 void G1RemSet::initialize(size_t capacity, uint max_regions) {
   G1FromCardCache::initialize(num_par_rem_sets(), max_regions);
+  _scan_state->initialize(max_regions);
   {
     GCTraceTime(Debug, gc, marking)("Initialize Card Live Data");
     _card_live_data.initialize(capacity, max_regions);
@@ -96,29 +184,29 @@
   }
 }
 
-ScanRSClosure::ScanRSClosure(G1ParPushHeapRSClosure* oc,
-                             CodeBlobClosure* code_root_cl,
-                             uint worker_i) :
-  _oc(oc),
+G1ScanRSClosure::G1ScanRSClosure(G1RemSetScanState* scan_state,
+                                 G1ParPushHeapRSClosure* push_heap_cl,
+                                 CodeBlobClosure* code_root_cl,
+                                 uint worker_i) :
+  _scan_state(scan_state),
+  _push_heap_cl(push_heap_cl),
   _code_root_cl(code_root_cl),
   _strong_code_root_scan_time_sec(0.0),
   _cards(0),
   _cards_done(0),
-  _worker_i(worker_i),
-  _try_claimed(false) {
+  _worker_i(worker_i) {
   _g1h = G1CollectedHeap::heap();
   _bot = _g1h->bot();
   _ct_bs = _g1h->g1_barrier_set();
   _block_size = MAX2<size_t>(G1RSetScanBlockSize, 1);
 }
 
-void ScanRSClosure::scanCard(size_t index, HeapRegion *r) {
+void G1ScanRSClosure::scan_card(size_t index, HeapRegion *r) {
   // Stack allocate the DirtyCardToOopClosure instance
-  HeapRegionDCTOC cl(_g1h, r, _oc,
-      CardTableModRefBS::Precise);
+  HeapRegionDCTOC cl(_g1h, r, _push_heap_cl, CardTableModRefBS::Precise);
 
   // Set the "from" region in the closure.
-  _oc->set_region(r);
+  _push_heap_cl->set_region(r);
   MemRegion card_region(_bot->address_for_index(index), BOTConstants::N_words);
   MemRegion pre_gc_allocated(r->bottom(), r->scan_top());
   MemRegion mr = pre_gc_allocated.intersection(card_region);
@@ -132,37 +220,39 @@
   }
 }
 
-void ScanRSClosure::scan_strong_code_roots(HeapRegion* r) {
+void G1ScanRSClosure::scan_strong_code_roots(HeapRegion* r) {
   double scan_start = os::elapsedTime();
   r->strong_code_roots_do(_code_root_cl);
   _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start);
 }
 
-bool ScanRSClosure::doHeapRegion(HeapRegion* r) {
+bool G1ScanRSClosure::doHeapRegion(HeapRegion* r) {
   assert(r->in_collection_set(), "should only be called on elements of CS.");
-  HeapRegionRemSet* hrrs = r->rem_set();
-  if (hrrs->iter_is_complete()) return false; // All done.
-  if (!_try_claimed && !hrrs->claim_iter()) return false;
-  // If we ever free the collection set concurrently, we should also
-  // clear the card table concurrently therefore we won't need to
-  // add regions of the collection set to the dirty cards region.
-  _g1h->push_dirty_cards_region(r);
-  // If we didn't return above, then
-  //   _try_claimed || r->claim_iter()
-  // is true: either we're supposed to work on claimed-but-not-complete
-  // regions, or we successfully claimed the region.
+  uint region_idx = r->hrm_index();
 
-  HeapRegionRemSetIterator iter(hrrs);
+  if (_scan_state->iter_is_complete(region_idx)) {
+    return false;
+  }
+  if (_scan_state->claim_iter(region_idx)) {
+    // If we ever free the collection set concurrently, we should also
+    // clear the card table concurrently therefore we won't need to
+    // add regions of the collection set to the dirty cards region.
+    _g1h->push_dirty_cards_region(r);
+  }
+
+  HeapRegionRemSetIterator iter(r->rem_set());
   size_t card_index;
 
   // We claim cards in block so as to reduce the contention. The block size is determined by
   // the G1RSetScanBlockSize parameter.
-  size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
+  size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
   for (size_t current_card = 0; iter.has_next(card_index); current_card++) {
-    if (current_card >= jump_to_card + _block_size) {
-      jump_to_card = hrrs->iter_claimed_next(_block_size);
+    if (current_card >= claimed_card_block + _block_size) {
+      claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
     }
-    if (current_card < jump_to_card) continue;
+    if (current_card < claimed_card_block) {
+      continue;
+    }
     HeapWord* card_start = _g1h->bot()->address_for_index(card_index);
 
     HeapRegion* card_region = _g1h->heap_region_containing(card_start);
@@ -175,38 +265,33 @@
     // If the card is dirty, then we will scan it during updateRS.
     if (!card_region->in_collection_set() &&
         !_ct_bs->is_card_dirty(card_index)) {
-      scanCard(card_index, card_region);
+      scan_card(card_index, card_region);
     }
   }
-  if (!_try_claimed) {
+  if (_scan_state->set_iter_complete(region_idx)) {
     // Scan the strong code root list attached to the current region
     scan_strong_code_roots(r);
-
-    hrrs->set_iter_complete();
   }
   return false;
 }
 
-size_t G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
-                        CodeBlobClosure* heap_region_codeblobs,
-                        uint worker_i) {
+size_t G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              CodeBlobClosure* heap_region_codeblobs,
+                              uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
   HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i);
 
-  ScanRSClosure scanRScl(oc, heap_region_codeblobs, worker_i);
+  G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i);
+  _g1->collection_set_iterate_from(startRegion, &cl);
 
-  _g1->collection_set_iterate_from(startRegion, &scanRScl);
-  scanRScl.set_try_claimed();
-  _g1->collection_set_iterate_from(startRegion, &scanRScl);
-
-  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start)
-                            - scanRScl.strong_code_root_scan_time_sec();
+   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+                              cl.strong_code_root_scan_time_sec();
 
   _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
-  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
 
-  return scanRScl.cards_done();
+  return cl.cards_done();
 }
 
 // Closure used for updating RSets and recording references that
@@ -216,10 +301,12 @@
 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
   G1RemSet* _g1rs;
   DirtyCardQueue* _into_cset_dcq;
+  G1ParPushHeapRSClosure* _cl;
 public:
   RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
-                                              DirtyCardQueue* into_cset_dcq) :
-    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
+                                              DirtyCardQueue* into_cset_dcq,
+                                              G1ParPushHeapRSClosure* cl) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _cl(cl)
   {}
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
@@ -230,7 +317,7 @@
     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
     assert(worker_i < ParallelGCThreads, "should be a GC worker");
 
-    if (_g1rs->refine_card(card_ptr, worker_i, true)) {
+    if (_g1rs->refine_card(card_ptr, worker_i, _cl)) {
       // 'card_ptr' contains references that point into the collection
       // set. We need to record the card in the DCQS
       // (_into_cset_dirty_card_queue_set)
@@ -243,8 +330,10 @@
   }
 };
 
-void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
+void G1RemSet::update_rem_set(DirtyCardQueue* into_cset_dcq,
+                              G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              uint worker_i) {
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, oops_in_heap_closure);
 
   G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   if (ConcurrentG1Refine::hot_card_cache_enabled()) {
@@ -260,14 +349,9 @@
   HeapRegionRemSet::cleanup();
 }
 
-size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
+size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
                                              CodeBlobClosure* heap_region_codeblobs,
                                              uint worker_i) {
-  // We cache the value of 'oc' closure into the appropriate slot in the
-  // _cset_rs_update_cl for this worker
-  assert(worker_i < n_workers(), "sanity");
-  _cset_rs_update_cl[worker_i] = oc;
-
   // A DirtyCardQueue that is used to hold cards containing references
   // that point into the collection set. This DCQ is associated with a
   // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
@@ -279,18 +363,16 @@
   // DirtyCardQueueSet that is used to manage RSet updates
   DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
 
-  updateRS(&into_cset_dcq, worker_i);
-  size_t cards_scanned = scanRS(oc, heap_region_codeblobs, worker_i);
-
-  // We now clear the cached values of _cset_rs_update_cl for this worker
-  _cset_rs_update_cl[worker_i] = NULL;
-  return cards_scanned;
+  update_rem_set(&into_cset_dcq, cl, worker_i);
+  return scan_rem_set(cl, heap_region_codeblobs, worker_i);;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
   _g1->set_refine_cte_cl_concurrency(false);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
+
+  _scan_state->reset();
 }
 
 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
@@ -365,8 +447,9 @@
 // into the collection set, if we're checking for such references;
 // false otherwise.
 
-bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
-                           bool check_for_refs_into_cset) {
+bool G1RemSet::refine_card(jbyte* card_ptr,
+                           uint worker_i,
+                           G1ParPushHeapRSClosure*  oops_in_heap_closure) {
   assert(_g1->is_in_exact(_ct_bs->addr_for(card_ptr)),
          "Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap",
          p2i(card_ptr),
@@ -374,6 +457,8 @@
          p2i(_ct_bs->addr_for(card_ptr)),
          _g1->addr_to_region(_ct_bs->addr_for(card_ptr)));
 
+  bool check_for_refs_into_cset = oops_in_heap_closure != NULL;
+
   // If the card is no longer dirty, nothing to do.
   if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
     // No need to return that this card contains refs that point
@@ -450,15 +535,6 @@
   HeapWord* end   = start + CardTableModRefBS::card_size_in_words;
   MemRegion dirtyRegion(start, end);
 
-  G1ParPushHeapRSClosure* oops_in_heap_closure = NULL;
-  if (check_for_refs_into_cset) {
-    // ConcurrentG1RefineThreads have worker numbers larger than what
-    // _cset_rs_update_cl[] is set up to handle. But those threads should
-    // only be active outside of a collection which means that when they
-    // reach here they should have check_for_refs_into_cset == false.
-    assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length");
-    oops_in_heap_closure = _cset_rs_update_cl[worker_i];
-  }
   G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
                                                  _g1->g1_rem_set(),
                                                  oops_in_heap_closure,
@@ -578,7 +654,7 @@
     hot_card_cache->set_use_cache(false);
 
     DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
-    updateRS(&into_cset_dcq, 0);
+    update_rem_set(&into_cset_dcq, NULL, 0);
     _into_cset_dirty_card_queue_set.clear();
 
     hot_card_cache->set_use_cache(use_hot_card_cache);
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Mon Apr 18 16:51:14 2016 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@
 class CodeBlobClosure;
 class G1CollectedHeap;
 class G1ParPushHeapRSClosure;
+class G1RemSetScanState;
 class G1Policy;
 class G1SATBCardTableModRefBS;
 class HeapRegionClaimer;
@@ -51,6 +52,7 @@
 // so that they can be used to update the individual region remsets.
 class G1RemSet: public CHeapObj<mtGC> {
 private:
+  G1RemSetScanState* _scan_state;
   G1CardLiveData _card_live_data;
 
   G1RemSetSummary _prev_period_summary;
@@ -72,10 +74,6 @@
 
   ConcurrentG1Refine*    _cg1r;
 
-  // Used for caching the closure that is responsible for scanning
-  // references into the collection set.
-  G1ParPushHeapRSClosure** _cset_rs_update_cl;
-
 public:
   // Gives an approximation on how many threads can be expected to add records to
   // a remembered set in parallel. This can be used for sizing data structures to
@@ -95,9 +93,9 @@
   G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
   ~G1RemSet();
 
-  // Invoke "blk->do_oop" on all pointers into the collection set
+  // Invoke "cl->do_oop" on all pointers into the collection set
   // from objects in regions outside the collection set (having
-  // invoked "blk->set_region" to set the "from" region correctly
+  // invoked "cl->set_region" to set the "from" region correctly
   // beforehand.)
   //
   // Apply non_heap_roots on the oops of the unmarked nmethods
@@ -112,7 +110,7 @@
   //
   // Returns the number of cards scanned while looking for pointers
   // into the collection set.
-  size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
+  size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
                                      CodeBlobClosure* heap_region_codeblobs,
                                      uint worker_i);
 
@@ -124,13 +122,15 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  size_t scanRS(G1ParPushHeapRSClosure* oc,
-                CodeBlobClosure* heap_region_codeblobs,
-                uint worker_i);
+  size_t scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
+                      CodeBlobClosure* heap_region_codeblobs,
+                      uint worker_i);
 
-  void updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i);
+  G1RemSetScanState* scan_state() const { return _scan_state; }
 
-  CardTableModRefBS* ct_bs() { return _ct_bs; }
+  // Flush remaining refinement buffers into the remembered set,
+  // applying oops_in_heap_closure on the references found.
+  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParPushHeapRSClosure* oops_in_heap_closure, uint worker_i);
 
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.
@@ -145,12 +145,12 @@
   void scrub(uint worker_num, HeapRegionClaimer* hrclaimer);
 
   // Refine the card corresponding to "card_ptr".
-  // If check_for_refs_into_cset is true, a true result is returned
+  // If oops_in_heap_closure is not NULL, a true result is returned
   // if the given card contains oops that have references into the
   // current collection set.
   virtual bool refine_card(jbyte* card_ptr,
                            uint worker_i,
-                           bool check_for_refs_into_cset);
+                           G1ParPushHeapRSClosure* oops_in_heap_closure);
 
   // Print accumulated summary info from the start of the VM.
   virtual void print_summary_info();
@@ -179,11 +179,14 @@
 #endif
 };
 
-class ScanRSClosure : public HeapRegionClosure {
-  size_t _cards_done, _cards;
+class G1ScanRSClosure : public HeapRegionClosure {
+  G1RemSetScanState* _scan_state;
+
+  size_t _cards_done;
+  size_t _cards;
   G1CollectedHeap* _g1h;
 
-  G1ParPushHeapRSClosure* _oc;
+  G1ParPushHeapRSClosure* _push_heap_cl;
   CodeBlobClosure* _code_root_cl;
 
   G1BlockOffsetTable* _bot;
@@ -192,26 +195,23 @@
   double _strong_code_root_scan_time_sec;
   uint   _worker_i;
   size_t _block_size;
-  bool   _try_claimed;
 
+  void scan_card(size_t index, HeapRegion *r);
+  void scan_strong_code_roots(HeapRegion* r);
 public:
-  ScanRSClosure(G1ParPushHeapRSClosure* oc,
-                CodeBlobClosure* code_root_cl,
-                uint worker_i);
+  G1ScanRSClosure(G1RemSetScanState* scan_state,
+                  G1ParPushHeapRSClosure* push_heap_cl,
+                  CodeBlobClosure* code_root_cl,
+                  uint worker_i);
 
   bool doHeapRegion(HeapRegion* r);
 
   double strong_code_root_scan_time_sec() {
     return _strong_code_root_scan_time_sec;
   }
+
   size_t cards_done() { return _cards_done;}
   size_t cards_looked_up() { return _cards;}
-  void set_try_claimed() { _try_claimed = true; }
-private:
-  void scanCard(size_t index, HeapRegion *r);
-  void printCard(HeapRegion* card_region, size_t card_index,
-                 HeapWord* card_start);
-  void scan_strong_code_roots(HeapRegion* r);
 };
 
 class UpdateRSOopClosure: public ExtendedOopClosure {
--- a/hotspot/src/share/vm/gc/g1/heapRegionRemSet.cpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/heapRegionRemSet.cpp	Mon Apr 18 16:51:14 2016 +0200
@@ -692,8 +692,8 @@
                                    HeapRegion* hr)
   : _bot(bot),
     _m(Mutex::leaf, FormatBuffer<128>("HeapRegionRemSet lock #%u", hr->hrm_index()), true, Monitor::_safepoint_check_never),
-    _code_roots(), _other_regions(hr, &_m), _iter_state(Unclaimed), _iter_claimed(0) {
-  reset_for_par_iteration();
+    _code_roots(),
+    _other_regions(hr, &_m) {
 }
 
 void HeapRegionRemSet::setup_remset_size() {
@@ -710,20 +710,6 @@
   guarantee(G1RSetSparseRegionEntries > 0 && G1RSetRegionEntries > 0 , "Sanity");
 }
 
-bool HeapRegionRemSet::claim_iter() {
-  if (_iter_state != Unclaimed) return false;
-  jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed);
-  return (res == Unclaimed);
-}
-
-void HeapRegionRemSet::set_iter_complete() {
-  _iter_state = Complete;
-}
-
-bool HeapRegionRemSet::iter_is_complete() {
-  return _iter_state == Complete;
-}
-
 #ifndef PRODUCT
 void HeapRegionRemSet::print() {
   HeapRegionRemSetIterator iter(this);
@@ -760,14 +746,6 @@
   _code_roots.clear();
   _other_regions.clear();
   assert(occupied_locked() == 0, "Should be clear.");
-  reset_for_par_iteration();
-}
-
-void HeapRegionRemSet::reset_for_par_iteration() {
-  _iter_state = Unclaimed;
-  _iter_claimed = 0;
-  // It's good to check this to make sure that the two methods are in sync.
-  assert(verify_ready_for_par_iteration(), "post-condition");
 }
 
 void HeapRegionRemSet::scrub(G1CardLiveData* live_data) {
--- a/hotspot/src/share/vm/gc/g1/heapRegionRemSet.hpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/heapRegionRemSet.hpp	Mon Apr 18 16:51:14 2016 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -185,10 +185,6 @@
 
   OtherRegionsTable _other_regions;
 
-  enum ParIterState { Unclaimed, Claimed, Complete };
-  volatile ParIterState _iter_state;
-  volatile size_t _iter_claimed;
-
 public:
   HeapRegionRemSet(G1BlockOffsetTable* bot, HeapRegion* hr);
 
@@ -240,27 +236,6 @@
   void clear();
   void clear_locked();
 
-  // Attempt to claim the region.  Returns true iff this call caused an
-  // atomic transition from Unclaimed to Claimed.
-  bool claim_iter();
-  // Sets the iteration state to "complete".
-  void set_iter_complete();
-  // Returns "true" iff the region's iteration is complete.
-  bool iter_is_complete();
-
-  // Support for claiming blocks of cards during iteration
-  size_t iter_claimed() const { return _iter_claimed; }
-  // Claim the next block of cards
-  size_t iter_claimed_next(size_t step) {
-    return Atomic::add(step, &_iter_claimed) - step;
-  }
-
-  void reset_for_par_iteration();
-
-  bool verify_ready_for_par_iteration() {
-    return (_iter_state == Unclaimed) && (_iter_claimed == 0);
-  }
-
   // The actual # of bytes this hr_remset takes up.
   // Note also includes the strong code root set.
   size_t mem_size() {
--- a/hotspot/src/share/vm/gc/g1/heapRegionSet.cpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/heapRegionSet.cpp	Mon Apr 18 16:51:14 2016 +0200
@@ -38,7 +38,6 @@
   assert(!hr->is_free() || hr->is_empty(), "Free region %u is not empty for set %s", hr->hrm_index(), name());
   assert(!hr->is_empty() || hr->is_free() || hr->is_archive(),
          "Empty region %u is not free or archive for set %s", hr->hrm_index(), name());
-  assert(hr->rem_set()->verify_ready_for_par_iteration(), "Wrong iteration state %u", hr->hrm_index());
 }
 #endif