hotspot/src/share/vm/gc/g1/g1RemSet.cpp
changeset 37988 bf4018edea5e
parent 37985 539c597ee0fa
child 38154 4c30333c304e
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Thu Apr 14 10:57:09 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Mon Apr 18 16:51:14 2016 +0200
@@ -45,20 +45,108 @@
 #include "utilities/intHisto.hpp"
 #include "utilities/stack.inline.hpp"
 
+// Collects information about the overall remembered set scan progress during an evacuation.
+class G1RemSetScanState : public CHeapObj<mtGC> {
+private:
+  size_t _max_regions;
+
+  // Scan progress for the remembered set of a single region. Transitions from
+  // Unclaimed -> Claimed -> Complete.
+  // At each of the transitions the thread that does the transition needs to perform
+  // some special action once. This is the reason for the extra "Claimed" state.
+  typedef jint G1RemsetIterState;
+
+  static const G1RemsetIterState Unclaimed = 0; // The remembered set has not been scanned yet.
+  static const G1RemsetIterState Claimed = 1;   // The remembered set is currently being scanned.
+  static const G1RemsetIterState Complete = 2;  // The remembered set has been completely scanned.
+
+  G1RemsetIterState volatile* _iter_states;
+  // The current location where the next thread should continue scanning in a region's
+  // remembered set.
+  size_t volatile* _iter_claims;
+
+public:
+  G1RemSetScanState() :
+    _max_regions(0),
+    _iter_states(NULL),
+    _iter_claims(NULL) {
+
+  }
+
+  ~G1RemSetScanState() {
+    if (_iter_states != NULL) {
+      FREE_C_HEAP_ARRAY(G1RemsetIterState, _iter_states);
+    }
+    if (_iter_claims != NULL) {
+      FREE_C_HEAP_ARRAY(size_t, _iter_claims);
+    }
+  }
+
+  void initialize(uint max_regions) {
+    assert(_iter_states == NULL, "Must not be initialized twice");
+    assert(_iter_claims == NULL, "Must not be initialized twice");
+    _max_regions = max_regions;
+    _iter_states = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC);
+    _iter_claims = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+  }
+
+  void reset() {
+    for (uint i = 0; i < _max_regions; i++) {
+      _iter_states[i] = Unclaimed;
+    }
+    memset((void*)_iter_claims, 0, _max_regions * sizeof(size_t));
+  }
+
+  // Attempt to claim the remembered set of the region for iteration. Returns true
+  // if this call caused the transition from Unclaimed to Claimed.
+  inline bool claim_iter(uint region) {
+    assert(region < _max_regions, "Tried to access invalid region %u", region);
+    if (_iter_states[region] != Unclaimed) {
+      return false;
+    }
+    jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_states[region]), Unclaimed);
+    return (res == Unclaimed);
+  }
+
+  // Try to atomically sets the iteration state to "complete". Returns true for the
+  // thread that caused the transition.
+  inline bool set_iter_complete(uint region) {
+    if (iter_is_complete(region)) {
+      return false;
+    }
+    jint res = Atomic::cmpxchg(Complete, (jint*)(&_iter_states[region]), Claimed);
+    return (res == Claimed);
+  }
+
+  // Returns true if the region's iteration is complete.
+  inline bool iter_is_complete(uint region) const {
+    assert(region < _max_regions, "Tried to access invalid region %u", region);
+    return _iter_states[region] == Complete;
+  }
+
+  // The current position within the remembered set of the given region.
+  inline size_t iter_claimed(uint region) const {
+    assert(region < _max_regions, "Tried to access invalid region %u", region);
+    return _iter_claims[region];
+  }
+
+  // Claim the next block of cards within the remembered set of the region with
+  // step size.
+  inline size_t iter_claimed_next(uint region, size_t step) {
+    return Atomic::add(step, &_iter_claims[region]) - step;
+  }
+};
+
 G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) :
   _g1(g1),
+  _scan_state(new G1RemSetScanState()),
   _conc_refine_cards(0),
   _ct_bs(ct_bs),
   _g1p(_g1->g1_policy()),
   _cg1r(g1->concurrent_g1_refine()),
-  _cset_rs_update_cl(NULL),
   _prev_period_summary(),
   _into_cset_dirty_card_queue_set(false)
 {
-  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
-  for (uint i = 0; i < n_workers(); i++) {
-    _cset_rs_update_cl[i] = NULL;
-  }
   if (log_is_enabled(Trace, gc, remset)) {
     _prev_period_summary.initialize(this);
   }
@@ -74,10 +162,9 @@
 }
 
 G1RemSet::~G1RemSet() {
-  for (uint i = 0; i < n_workers(); i++) {
-    assert(_cset_rs_update_cl[i] == NULL, "it should be");
+  if (_scan_state != NULL) {
+    delete _scan_state;
   }
-  FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl);
 }
 
 uint G1RemSet::num_par_rem_sets() {
@@ -86,6 +173,7 @@
 
 void G1RemSet::initialize(size_t capacity, uint max_regions) {
   G1FromCardCache::initialize(num_par_rem_sets(), max_regions);
+  _scan_state->initialize(max_regions);
   {
     GCTraceTime(Debug, gc, marking)("Initialize Card Live Data");
     _card_live_data.initialize(capacity, max_regions);
@@ -96,29 +184,29 @@
   }
 }
 
-ScanRSClosure::ScanRSClosure(G1ParPushHeapRSClosure* oc,
-                             CodeBlobClosure* code_root_cl,
-                             uint worker_i) :
-  _oc(oc),
+G1ScanRSClosure::G1ScanRSClosure(G1RemSetScanState* scan_state,
+                                 G1ParPushHeapRSClosure* push_heap_cl,
+                                 CodeBlobClosure* code_root_cl,
+                                 uint worker_i) :
+  _scan_state(scan_state),
+  _push_heap_cl(push_heap_cl),
   _code_root_cl(code_root_cl),
   _strong_code_root_scan_time_sec(0.0),
   _cards(0),
   _cards_done(0),
-  _worker_i(worker_i),
-  _try_claimed(false) {
+  _worker_i(worker_i) {
   _g1h = G1CollectedHeap::heap();
   _bot = _g1h->bot();
   _ct_bs = _g1h->g1_barrier_set();
   _block_size = MAX2<size_t>(G1RSetScanBlockSize, 1);
 }
 
-void ScanRSClosure::scanCard(size_t index, HeapRegion *r) {
+void G1ScanRSClosure::scan_card(size_t index, HeapRegion *r) {
   // Stack allocate the DirtyCardToOopClosure instance
-  HeapRegionDCTOC cl(_g1h, r, _oc,
-      CardTableModRefBS::Precise);
+  HeapRegionDCTOC cl(_g1h, r, _push_heap_cl, CardTableModRefBS::Precise);
 
   // Set the "from" region in the closure.
-  _oc->set_region(r);
+  _push_heap_cl->set_region(r);
   MemRegion card_region(_bot->address_for_index(index), BOTConstants::N_words);
   MemRegion pre_gc_allocated(r->bottom(), r->scan_top());
   MemRegion mr = pre_gc_allocated.intersection(card_region);
@@ -132,37 +220,39 @@
   }
 }
 
-void ScanRSClosure::scan_strong_code_roots(HeapRegion* r) {
+void G1ScanRSClosure::scan_strong_code_roots(HeapRegion* r) {
   double scan_start = os::elapsedTime();
   r->strong_code_roots_do(_code_root_cl);
   _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start);
 }
 
-bool ScanRSClosure::doHeapRegion(HeapRegion* r) {
+bool G1ScanRSClosure::doHeapRegion(HeapRegion* r) {
   assert(r->in_collection_set(), "should only be called on elements of CS.");
-  HeapRegionRemSet* hrrs = r->rem_set();
-  if (hrrs->iter_is_complete()) return false; // All done.
-  if (!_try_claimed && !hrrs->claim_iter()) return false;
-  // If we ever free the collection set concurrently, we should also
-  // clear the card table concurrently therefore we won't need to
-  // add regions of the collection set to the dirty cards region.
-  _g1h->push_dirty_cards_region(r);
-  // If we didn't return above, then
-  //   _try_claimed || r->claim_iter()
-  // is true: either we're supposed to work on claimed-but-not-complete
-  // regions, or we successfully claimed the region.
+  uint region_idx = r->hrm_index();
 
-  HeapRegionRemSetIterator iter(hrrs);
+  if (_scan_state->iter_is_complete(region_idx)) {
+    return false;
+  }
+  if (_scan_state->claim_iter(region_idx)) {
+    // If we ever free the collection set concurrently, we should also
+    // clear the card table concurrently therefore we won't need to
+    // add regions of the collection set to the dirty cards region.
+    _g1h->push_dirty_cards_region(r);
+  }
+
+  HeapRegionRemSetIterator iter(r->rem_set());
   size_t card_index;
 
   // We claim cards in block so as to reduce the contention. The block size is determined by
   // the G1RSetScanBlockSize parameter.
-  size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
+  size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
   for (size_t current_card = 0; iter.has_next(card_index); current_card++) {
-    if (current_card >= jump_to_card + _block_size) {
-      jump_to_card = hrrs->iter_claimed_next(_block_size);
+    if (current_card >= claimed_card_block + _block_size) {
+      claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
     }
-    if (current_card < jump_to_card) continue;
+    if (current_card < claimed_card_block) {
+      continue;
+    }
     HeapWord* card_start = _g1h->bot()->address_for_index(card_index);
 
     HeapRegion* card_region = _g1h->heap_region_containing(card_start);
@@ -175,38 +265,33 @@
     // If the card is dirty, then we will scan it during updateRS.
     if (!card_region->in_collection_set() &&
         !_ct_bs->is_card_dirty(card_index)) {
-      scanCard(card_index, card_region);
+      scan_card(card_index, card_region);
     }
   }
-  if (!_try_claimed) {
+  if (_scan_state->set_iter_complete(region_idx)) {
     // Scan the strong code root list attached to the current region
     scan_strong_code_roots(r);
-
-    hrrs->set_iter_complete();
   }
   return false;
 }
 
-size_t G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
-                        CodeBlobClosure* heap_region_codeblobs,
-                        uint worker_i) {
+size_t G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              CodeBlobClosure* heap_region_codeblobs,
+                              uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
   HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i);
 
-  ScanRSClosure scanRScl(oc, heap_region_codeblobs, worker_i);
+  G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i);
+  _g1->collection_set_iterate_from(startRegion, &cl);
 
-  _g1->collection_set_iterate_from(startRegion, &scanRScl);
-  scanRScl.set_try_claimed();
-  _g1->collection_set_iterate_from(startRegion, &scanRScl);
-
-  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start)
-                            - scanRScl.strong_code_root_scan_time_sec();
+   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+                              cl.strong_code_root_scan_time_sec();
 
   _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
-  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
 
-  return scanRScl.cards_done();
+  return cl.cards_done();
 }
 
 // Closure used for updating RSets and recording references that
@@ -216,10 +301,12 @@
 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
   G1RemSet* _g1rs;
   DirtyCardQueue* _into_cset_dcq;
+  G1ParPushHeapRSClosure* _cl;
 public:
   RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
-                                              DirtyCardQueue* into_cset_dcq) :
-    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
+                                              DirtyCardQueue* into_cset_dcq,
+                                              G1ParPushHeapRSClosure* cl) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _cl(cl)
   {}
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
@@ -230,7 +317,7 @@
     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
     assert(worker_i < ParallelGCThreads, "should be a GC worker");
 
-    if (_g1rs->refine_card(card_ptr, worker_i, true)) {
+    if (_g1rs->refine_card(card_ptr, worker_i, _cl)) {
       // 'card_ptr' contains references that point into the collection
       // set. We need to record the card in the DCQS
       // (_into_cset_dirty_card_queue_set)
@@ -243,8 +330,10 @@
   }
 };
 
-void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
+void G1RemSet::update_rem_set(DirtyCardQueue* into_cset_dcq,
+                              G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              uint worker_i) {
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, oops_in_heap_closure);
 
   G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   if (ConcurrentG1Refine::hot_card_cache_enabled()) {
@@ -260,14 +349,9 @@
   HeapRegionRemSet::cleanup();
 }
 
-size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
+size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
                                              CodeBlobClosure* heap_region_codeblobs,
                                              uint worker_i) {
-  // We cache the value of 'oc' closure into the appropriate slot in the
-  // _cset_rs_update_cl for this worker
-  assert(worker_i < n_workers(), "sanity");
-  _cset_rs_update_cl[worker_i] = oc;
-
   // A DirtyCardQueue that is used to hold cards containing references
   // that point into the collection set. This DCQ is associated with a
   // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
@@ -279,18 +363,16 @@
   // DirtyCardQueueSet that is used to manage RSet updates
   DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
 
-  updateRS(&into_cset_dcq, worker_i);
-  size_t cards_scanned = scanRS(oc, heap_region_codeblobs, worker_i);
-
-  // We now clear the cached values of _cset_rs_update_cl for this worker
-  _cset_rs_update_cl[worker_i] = NULL;
-  return cards_scanned;
+  update_rem_set(&into_cset_dcq, cl, worker_i);
+  return scan_rem_set(cl, heap_region_codeblobs, worker_i);;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
   _g1->set_refine_cte_cl_concurrency(false);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
+
+  _scan_state->reset();
 }
 
 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
@@ -365,8 +447,9 @@
 // into the collection set, if we're checking for such references;
 // false otherwise.
 
-bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
-                           bool check_for_refs_into_cset) {
+bool G1RemSet::refine_card(jbyte* card_ptr,
+                           uint worker_i,
+                           G1ParPushHeapRSClosure*  oops_in_heap_closure) {
   assert(_g1->is_in_exact(_ct_bs->addr_for(card_ptr)),
          "Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap",
          p2i(card_ptr),
@@ -374,6 +457,8 @@
          p2i(_ct_bs->addr_for(card_ptr)),
          _g1->addr_to_region(_ct_bs->addr_for(card_ptr)));
 
+  bool check_for_refs_into_cset = oops_in_heap_closure != NULL;
+
   // If the card is no longer dirty, nothing to do.
   if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
     // No need to return that this card contains refs that point
@@ -450,15 +535,6 @@
   HeapWord* end   = start + CardTableModRefBS::card_size_in_words;
   MemRegion dirtyRegion(start, end);
 
-  G1ParPushHeapRSClosure* oops_in_heap_closure = NULL;
-  if (check_for_refs_into_cset) {
-    // ConcurrentG1RefineThreads have worker numbers larger than what
-    // _cset_rs_update_cl[] is set up to handle. But those threads should
-    // only be active outside of a collection which means that when they
-    // reach here they should have check_for_refs_into_cset == false.
-    assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length");
-    oops_in_heap_closure = _cset_rs_update_cl[worker_i];
-  }
   G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
                                                  _g1->g1_rem_set(),
                                                  oops_in_heap_closure,
@@ -578,7 +654,7 @@
     hot_card_cache->set_use_cache(false);
 
     DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
-    updateRS(&into_cset_dcq, 0);
+    update_rem_set(&into_cset_dcq, NULL, 0);
     _into_cset_dirty_card_queue_set.clear();
 
     hot_card_cache->set_use_cache(use_hot_card_cache);