6814437: G1: remove the _new_refs array
authorjohnc
Mon, 02 Aug 2010 12:51:43 -0700
changeset 6247 00e5cc407d03
parent 6246 2b94114ccaa4
child 6248 2e661807cef0
6814437: G1: remove the _new_refs array Summary: The per-worker _new_refs array is used to hold references that point into the collection set. It is populated during RSet updating and subsequently processed. In the event of an evacuation failure it processed again to recreate the RSets of regions in the collection set. Remove the per-worker _new_refs array by processing the references directly. Use a DirtyCardQueue to hold the cards containing the references so that the RSets of regions in the collection set can be recreated when handling an evacuation failure. Reviewed-by: iveresov, jmasa, tonyp
hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Mon Aug 02 12:51:43 2010 -0700
@@ -339,7 +339,9 @@
   return res;
 }
 
-void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+void ConcurrentG1Refine::clean_up_cache(int worker_i,
+                                        G1RemSet* g1rs,
+                                        DirtyCardQueue* into_cset_dcq) {
   assert(!use_cache(), "cache should be disabled");
   int start_idx;
 
@@ -353,7 +355,19 @@
       for (int i = start_idx; i < end_idx; i++) {
         jbyte* entry = _hot_cache[i];
         if (entry != NULL) {
-          g1rs->concurrentRefineOneCard(entry, worker_i);
+          if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
+            // 'entry' contains references that point into the current
+            // collection set. We need to record 'entry' in the DCQS
+            // that's used for that purpose.
+            //
+            // The only time we care about recording cards that contain
+            // references that point into the collection set is during
+            // RSet updating while within an evacuation pause.
+            // In this case worker_i should be the id of a GC worker thread
+            assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
+            assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "incorrect worker id");
+            into_cset_dcq->enqueue(entry);
+          }
         }
       }
     }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -184,7 +184,7 @@
   jbyte* cache_insert(jbyte* card_ptr, bool* defer);
 
   // Process the cached entries.
-  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+  void clean_up_cache(int worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq);
 
   // Set up for parallel processing of the cards in the hot cache
   void clear_hot_cache_claimed_index() {
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,13 +178,14 @@
 }
 
 bool DirtyCardQueueSet::
-apply_closure_to_completed_buffer_helper(int worker_i,
+apply_closure_to_completed_buffer_helper(CardTableEntryClosure* cl,
+                                         int worker_i,
                                          BufferNode* nd) {
   if (nd != NULL) {
     void **buf = BufferNode::make_buffer_from_node(nd);
     size_t index = nd->index();
     bool b =
-      DirtyCardQueue::apply_closure_to_buffer(_closure, buf,
+      DirtyCardQueue::apply_closure_to_buffer(cl, buf,
                                               index, _sz,
                                               true, worker_i);
     if (b) {
@@ -199,15 +200,22 @@
   }
 }
 
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(CardTableEntryClosure* cl,
+                                                          int worker_i,
+                                                          int stop_at,
+                                                          bool during_pause) {
+  assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
+  BufferNode* nd = get_completed_buffer(stop_at);
+  bool res = apply_closure_to_completed_buffer_helper(cl, worker_i, nd);
+  if (res) Atomic::inc(&_processed_buffers_rs_thread);
+  return res;
+}
+
 bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
                                                           int stop_at,
-                                                          bool during_pause)
-{
-  assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
-  BufferNode* nd = get_completed_buffer(stop_at);
-  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
-  if (res) Atomic::inc(&_processed_buffers_rs_thread);
-  return res;
+                                                          bool during_pause) {
+  return apply_closure_to_completed_buffer(_closure, worker_i,
+                                           stop_at, during_pause);
 }
 
 void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
@@ -222,8 +230,8 @@
   }
 }
 
-void DirtyCardQueueSet::abandon_logs() {
-  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+// Deallocates any completed log buffers
+void DirtyCardQueueSet::clear() {
   BufferNode* buffers_to_delete = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
@@ -242,6 +250,12 @@
     buffers_to_delete = nd->next();
     deallocate_buffer(BufferNode::make_buffer_from_node(nd));
   }
+
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  clear();
   // Since abandon is done only at safepoints, we can safely manipulate
   // these queues.
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -123,7 +123,21 @@
                                          int stop_at = 0,
                                          bool during_pause = false);
 
-  bool apply_closure_to_completed_buffer_helper(int worker_i,
+  // If there exists some completed buffer, pop it, then apply the
+  // specified closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(CardTableEntryClosure* cl,
+                                         int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool during_pause = false);
+
+  // Helper routine for the above.
+  bool apply_closure_to_completed_buffer_helper(CardTableEntryClosure* cl,
+                                                int worker_i,
                                                 BufferNode* nd);
 
   BufferNode* get_completed_buffer(int stop_at);
@@ -136,6 +150,9 @@
     return &_shared_dirty_card_queue;
   }
 
+  // Deallocate any completed log buffers
+  void clear();
+
   // If a full collection is happening, reset partial logs, and ignore
   // completed ones: the full collection will make them all irrelevant.
   void abandon_logs();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 02 12:51:43 2010 -0700
@@ -56,7 +56,12 @@
     _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
   {}
   bool do_card_ptr(jbyte* card_ptr, int worker_i) {
-    _g1rs->concurrentRefineOneCard(card_ptr, worker_i);
+    bool oops_into_cset = _g1rs->concurrentRefineOneCard(card_ptr, worker_i, false);
+    // This path is executed by the concurrent refine or mutator threads,
+    // concurrently, and so we do not care if card_ptr contains references
+    // that point into the collection set.
+    assert(!oops_into_cset, "should be");
+
     if (_concurrent && _sts->should_yield()) {
       // Caller will actually yield.
       return false;
@@ -1322,6 +1327,7 @@
   SharedHeap(policy_),
   _g1_policy(policy_),
   _dirty_card_queue_set(false),
+  _into_cset_dirty_card_queue_set(false),
   _ref_processor(NULL),
   _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
@@ -1572,6 +1578,16 @@
                                       Shared_DirtyCardQ_lock,
                                       &JavaThread::dirty_card_queue_set());
   }
+
+  // Initialize the card queue set used to hold cards containing
+  // references into the collection set.
+  _into_cset_dirty_card_queue_set.initialize(DirtyCardQ_CBL_mon,
+                                             DirtyCardQ_FL_lock,
+                                             -1, // never trigger processing
+                                             -1, // no limit on length
+                                             Shared_DirtyCardQ_lock,
+                                             &JavaThread::dirty_card_queue_set());
+
   // In case we're keeping closure specialization stats, initialize those
   // counts and that mechanism.
   SpecializationStats::clear();
@@ -1603,14 +1619,16 @@
   return _g1_committed.byte_size();
 }
 
-void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
+void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
+                                                 DirtyCardQueue* into_cset_dcq,
+                                                 bool concurrent,
                                                  int worker_i) {
   // Clean cards in the hot card cache
-  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set());
+  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set(), into_cset_dcq);
 
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   int n_completed_buffers = 0;
-  while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
+  while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
   g1_policy()->record_update_rs_processed_buffers(worker_i,
@@ -3346,25 +3364,6 @@
   }
 };
 
-class UpdateRSetImmediate : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem_set;
-public:
-  UpdateRSetImmediate(G1CollectedHeap* g1) :
-    _g1(g1), _g1_rem_set(g1->g1_rem_set()) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
-      _g1_rem_set->par_write_ref(_from, p, 0);
-    }
-  }
-};
-
 class UpdateRSetDeferred : public OopsInHeapRegionClosure {
 private:
   G1CollectedHeap* _g1;
@@ -3389,8 +3388,6 @@
   }
 };
 
-
-
 class RemoveSelfPointerClosure: public ObjectClosure {
 private:
   G1CollectedHeap* _g1;
@@ -3453,7 +3450,7 @@
 };
 
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h);
+  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
   DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
   UpdateRSetDeferred deferred_update(_g1h, &dcq);
   OopsInHeapRegionClosure *cl;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -505,6 +505,12 @@
   // A function to check the consistency of dirty card logs.
   void check_ct_logs_at_safepoint();
 
+  // A DirtyCardQueueSet that is used to hold cards that contain
+  // references into the current collection set. This is used to
+  // update the remembered sets of the regions in the collection
+  // set in the event of an evacuation failure.
+  DirtyCardQueueSet _into_cset_dirty_card_queue_set;
+
   // After a collection pause, make the regions in the CS into free
   // regions.
   void free_collection_set(HeapRegion* cs_head);
@@ -661,6 +667,13 @@
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
 
+  // A DirtyCardQueueSet that is used to hold cards that contain
+  // references into the current collection set. This is used to
+  // update the remembered sets of the regions in the collection
+  // set in the event of an evacuation failure.
+  DirtyCardQueueSet& into_cset_dirty_card_queue_set()
+        { return _into_cset_dirty_card_queue_set; }
+
   // Create a G1CollectedHeap with the specified policy.
   // Must call the initialize method afterwards.
   // May not return if something goes wrong.
@@ -715,7 +728,9 @@
     OrderAccess::fence();
   }
 
-  void iterate_dirty_card_closure(bool concurrent, int worker_i);
+  void iterate_dirty_card_closure(CardTableEntryClosure* cl,
+                                  DirtyCardQueue* into_cset_dcq,
+                                  bool concurrent, int worker_i);
 
   // The shared block offset table array.
   G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Aug 02 12:51:43 2010 -0700
@@ -238,7 +238,6 @@
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
 
   _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
-  _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads];
 
   _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
 
@@ -842,7 +841,6 @@
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
-    _par_last_scan_new_refs_times_ms[i] = -1234.0;
     _par_last_obj_copy_times_ms[i] = -1234.0;
     _par_last_termination_times_ms[i] = -1234.0;
     _par_last_termination_attempts[i] = -1234.0;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -63,8 +63,6 @@
     define_num_seq(mark_stack_scan)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
-    define_num_seq(scan_new_refs) // Only for temp use; added to
-                                  // in parallel case.
     define_num_seq(obj_copy)
     define_num_seq(termination) // parallel only
     define_num_seq(parallel_other) // parallel only
@@ -177,7 +175,6 @@
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
-  double* _par_last_scan_new_refs_times_ms;
   double* _par_last_obj_copy_times_ms;
   double* _par_last_termination_times_ms;
   double* _par_last_termination_attempts;
@@ -933,14 +930,6 @@
     _par_last_scan_rs_times_ms[thread] = ms;
   }
 
-  void record_scan_new_refs_time(int thread, double ms) {
-    _par_last_scan_new_refs_times_ms[thread] = ms;
-  }
-
-  double get_scan_new_refs_time(int thread) {
-    return _par_last_scan_new_refs_times_ms[thread];
-  }
-
   void reset_obj_copy_time(int thread) {
     _par_last_obj_copy_times_ms[thread] = 0.0;
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,8 @@
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
 #if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
-    _dcto_cl->incr_count();
+    if (_dcto_cl != NULL)
+      _dcto_cl->incr_count();
 #endif
   }
 }
@@ -113,7 +114,10 @@
     if (_g1->in_cset_fast_test(obj)) {
       Prefetch::write(obj->mark_addr(), 0);
       Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // Place on the references queue
       _par_scan_state->push_on_queue(p);
     }
   }
 }
+
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Aug 02 12:51:43 2010 -0700
@@ -122,23 +122,24 @@
 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
   : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
-    _par_traversal_in_progress(false), _new_refs(NULL),
+    _par_traversal_in_progress(false),
+    _cset_rs_update_cl(NULL),
     _cards_scanned(NULL), _total_cards_scanned(0)
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
   for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
+    _cset_rs_update_cl[i] = NULL;
   }
 }
 
 HRInto_G1RemSet::~HRInto_G1RemSet() {
   delete _seq_task;
   for (uint i = 0; i < n_workers(); i++) {
-    delete _new_refs[i];
+    assert(_cset_rs_update_cl[i] == NULL, "it should be");
   }
-  FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
+  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -306,12 +307,45 @@
   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
 }
 
-void HRInto_G1RemSet::updateRS(int worker_i) {
-  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+// Closure used for updating RSets and recording references that
+// point into the collection set. Only called during an
+// evacuation pause.
+
+class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
+  G1RemSet* _g1rs;
+  DirtyCardQueue* _into_cset_dcq;
+public:
+  RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
+                                              DirtyCardQueue* into_cset_dcq) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
+  {}
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    // The only time we care about recording cards that
+    // contain references that point into the collection set
+    // is during RSet updating within an evacuation pause.
+    // In this case worker_i should be the id of a GC worker thread.
+    assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
+    assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "should be a GC worker");
 
+    if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) {
+      // 'card_ptr' contains references that point into the collection
+      // set. We need to record the card in the DCQS
+      // (G1CollectedHeap::into_cset_dirty_card_queue_set())
+      // that's used for that purpose.
+      //
+      // Enqueue the card
+      _into_cset_dcq->enqueue(card_ptr);
+    }
+    return true;
+  }
+};
+
+void HRInto_G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, int worker_i) {
   double start = os::elapsedTime();
-  // Apply the appropriate closure to all remaining log entries.
-  _g1->iterate_dirty_card_closure(false, worker_i);
+  // Apply the given closure to all remaining log entries.
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
+  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
+
   // Now there should be no dirty cards.
   if (G1RSLogCheckCardTable) {
     CountNonCleanMemRegionClosure cl(_g1);
@@ -405,33 +439,6 @@
   }
 };
 
-template <class T> void
-HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
-                                    int worker_i) {
-  double scan_new_refs_start_sec = os::elapsedTime();
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
-  for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
-    T* p = (T*) _new_refs[worker_i]->at(i);
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    // *p was in the collection set when p was pushed on "_new_refs", but
-    // another thread may have processed this location from an RS, so it
-    // might not point into the CS any longer.  If so, it's obviously been
-    // processed, and we don't need to do anything further.
-    if (g1h->obj_in_cs(obj)) {
-      HeapRegion* r = g1h->heap_region_containing(p);
-
-      DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
-      oc->set_region(r);
-      // If "p" has already been processed concurrently, this is
-      // idempotent.
-      oc->do_oop(p);
-    }
-  }
-  double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0;
-  _g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms);
-}
-
 void HRInto_G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
@@ -457,6 +464,26 @@
     count_cl.print_histo();
   }
 
+  // We cache the value of 'oc' closure into the appropriate slot in the
+  // _cset_rs_update_cl for this worker
+  assert(worker_i < (int)n_workers(), "sanity");
+  _cset_rs_update_cl[worker_i] = oc;
+
+  // A DirtyCardQueue that is used to hold cards containing references
+  // that point into the collection set. This DCQ is associated with a
+  // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
+  // circumstances (i.e. the pause successfully completes), these cards
+  // are just discarded (there's no need to update the RSets of regions
+  // that were in the collection set - after the pause these regions
+  // are wholly 'free' of live objects. In the event of an evacuation
+  // failure the cards/buffers in this queue set are:
+  // * passed to the DirtyCardQueueSet that is used to manage deferred
+  //   RSet updates, or
+  // * scanned for references that point into the collection set
+  //   and the RSet of the corresponding region in the collection set
+  //   is updated immediately.
+  DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
+
   if (ParallelGCThreads > 0) {
     // The two flags below were introduced temporarily to serialize
     // the updating and scanning of remembered sets. There are some
@@ -465,12 +492,10 @@
     // conditions, we'll revert back to parallel remembered set
     // updating and scanning. See CRs 6677707 and 6677708.
     if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-      updateRS(worker_i);
-      scanNewRefsRS(oc, worker_i);
+      updateRS(&into_cset_dcq, worker_i);
     } else {
       _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
       _g1p->record_update_rs_time(worker_i, 0.0);
-      _g1p->record_scan_new_refs_time(worker_i, 0.0);
     }
     if (G1UseParallelRSetScanning || (worker_i == 0)) {
       scanRS(oc, worker_i);
@@ -479,10 +504,12 @@
     }
   } else {
     assert(worker_i == 0, "invariant");
-    updateRS(0);
-    scanNewRefsRS(oc, 0);
+    updateRS(&into_cset_dcq, 0);
     scanRS(oc, 0);
   }
+
+  // We now clear the cached values of _cset_rs_update_cl for this worker
+  _cset_rs_update_cl[worker_i] = NULL;
 }
 
 void HRInto_G1RemSet::
@@ -519,49 +546,65 @@
   }
 };
 
-class UpdateRSetOopsIntoCSImmediate : public OopClosure {
-  G1CollectedHeap* _g1;
-public:
-  UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
-    if (to->in_collection_set()) {
-      to->rem_set()->add_reference(p, 0);
-    }
-  }
-};
-
-class UpdateRSetOopsIntoCSDeferred : public OopClosure {
+// This closure, applied to a DirtyCardQueueSet, is used to immediately
+// update the RSets for the regions in the CSet. For each card it iterates
+// through the oops which coincide with that card. It scans the reference
+// fields in each oop; when it finds an oop that points into the collection
+// set, the RSet for the region containing the referenced object is updated.
+// Note: _par_traversal_in_progress in the G1RemSet must be FALSE; otherwise
+// the UpdateRSetImmediate closure will cause cards to be enqueued on to
+// the DCQS that we're iterating over, causing an infinite loop.
+class UpdateRSetCardTableEntryIntoCSetClosure: public CardTableEntryClosure {
   G1CollectedHeap* _g1;
   CardTableModRefBS* _ct_bs;
-  DirtyCardQueue* _dcq;
 public:
-  UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    if (_g1->obj_in_cs(obj)) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
+  UpdateRSetCardTableEntryIntoCSetClosure(G1CollectedHeap* g1,
+                                          CardTableModRefBS* bs):
+    _g1(g1), _ct_bs(bs)
+  { }
+
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    // Construct the region representing the card.
+    HeapWord* start = _ct_bs->addr_for(card_ptr);
+    // And find the region containing it.
+    HeapRegion* r = _g1->heap_region_containing(start);
+    assert(r != NULL, "unexpected null");
+
+    // Scan oops in the card looking for references into the collection set
+    HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+    MemRegion scanRegion(start, end);
+
+    UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set());
+    FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl);
+    FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl);
+
+    // We can pass false as the "filter_young" parameter here as:
+    // * we should be in a STW pause,
+    // * the DCQS to which this closure is applied is used to hold
+    //   references that point into the collection set from the prior
+    //   RSet updating,
+    // * the post-write barrier shouldn't be logging updates to young
+    //   regions (but there is a situation where this can happen - see
+    //   the comment in HRInto_G1RemSet::concurrentRefineOneCard below -
+    //   that should not be applicable here), and
+    // * during actual RSet updating, the filtering of cards in young
+    //   regions in HeapRegion::oops_on_card_seq_iterate_careful is
+    //   employed.
+    // As a result, when this closure is applied to "refs into cset"
+    // DCQS, we shouldn't see any cards in young regions.
+    update_rs_cl.set_region(r);
+    HeapWord* stop_point =
+      r->oops_on_card_seq_iterate_careful(scanRegion,
+                                        &filter_then_update_rs_cset_oop_cl,
+                                        false /* filter_young */);
+
+    // Since this is performed in the event of an evacuation failure, we
+    // we shouldn't see a non-null stop point
+    assert(stop_point == NULL, "saw an unallocated region");
+    return true;
   }
 };
 
-template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
-  for (size_t i = 0; i < n_workers(); i++) {
-    for (int j = 0; j < _new_refs[i]->length(); j++) {
-      T* p = (T*) _new_refs[i]->at(j);
-      cl->do_oop(p);
-    }
-  }
-}
-
 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   guarantee( _cards_scanned != NULL, "invariant" );
   _total_cards_scanned = 0;
@@ -584,21 +627,38 @@
     set_par_traversal(false);
   }
 
+  DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
+  int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
+
   if (_g1->evacuation_failed()) {
-    // Restore remembered sets for the regions pointing into
-    // the collection set.
+    // Restore remembered sets for the regions pointing into the collection set.
+
     if (G1DeferredRSUpdate) {
-      DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
-      UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
-      new_refs_iterate(&deferred_update);
+      // If deferred RS updates are enabled then we just need to transfer
+      // the completed buffers from (a) the DirtyCardQueueSet used to hold
+      // cards that contain references that point into the collection set
+      // to (b) the DCQS used to hold the deferred RS updates
+      _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
     } else {
-      UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
-      new_refs_iterate(&immediate_update);
+
+      CardTableModRefBS* bs = (CardTableModRefBS*)_g1->barrier_set();
+      UpdateRSetCardTableEntryIntoCSetClosure update_rs_cset_immediate(_g1, bs);
+
+      int n_completed_buffers = 0;
+      while (into_cset_dcqs.apply_closure_to_completed_buffer(&update_rs_cset_immediate,
+                                                    0, 0, true)) {
+        n_completed_buffers++;
+      }
+      assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
     }
   }
-  for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i]->clear();
-  }
+
+  // Free any completed buffers in the DirtyCardQueueSet used to hold cards
+  // which contain references that point into the collection.
+  _g1->into_cset_dirty_card_queue_set().clear();
+  assert(_g1->into_cset_dirty_card_queue_set().completed_buffers_num() == 0,
+         "all buffers should be freed");
+  _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
 
   assert(!_par_traversal_in_progress, "Invariant between iterations.");
 }
@@ -652,7 +712,43 @@
 
 static IntHistogram out_of_histo(50, 50);
 
-void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) {
+class TriggerClosure : public OopClosure {
+  bool _trigger;
+public:
+  TriggerClosure() : _trigger(false) { }
+  bool value() const { return _trigger; }
+  template <class T> void do_oop_nv(T* p) { _trigger = true; }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class InvokeIfNotTriggeredClosure: public OopClosure {
+  TriggerClosure* _t;
+  OopClosure* _oc;
+public:
+  InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
+    _t(t), _oc(oc) { }
+  template <class T> void do_oop_nv(T* p) {
+    if (!_t->value()) _oc->do_oop(p);
+  }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class Mux2Closure : public OopClosure {
+  OopClosure* _c1;
+  OopClosure* _c2;
+public:
+  Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
+  template <class T> void do_oop_nv(T* p) {
+    _c1->do_oop(p); _c2->do_oop(p);
+  }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+bool HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
+                                                   bool check_for_refs_into_cset) {
   // Construct the region representing the card.
   HeapWord* start = _ct_bs->addr_for(card_ptr);
   // And find the region containing it.
@@ -669,7 +765,16 @@
 
   UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
   update_rs_oop_cl.set_from(r);
-  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  TriggerClosure trigger_cl;
+  FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
+  InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
+  Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
+
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
+                        (check_for_refs_into_cset ?
+                                (OopClosure*)&mux :
+                                (OopClosure*)&update_rs_oop_cl));
 
   // Undirty the card.
   *card_ptr = CardTableModRefBS::clean_card_val();
@@ -717,11 +822,18 @@
     out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
     _conc_refine_cards++;
   }
+
+  return trigger_cl.value();
 }
 
-void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
+bool HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                              bool check_for_refs_into_cset) {
   // If the card is no longer dirty, nothing to do.
-  if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
+  if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+    // No need to return that this card contains refs that point
+    // into the collection set.
+    return false;
+  }
 
   // Construct the region representing the card.
   HeapWord* start = _ct_bs->addr_for(card_ptr);
@@ -729,7 +841,9 @@
   HeapRegion* r = _g1->heap_region_containing(start);
   if (r == NULL) {
     guarantee(_g1->is_in_permanent(start), "Or else where?");
-    return;  // Not in the G1 heap (might be in perm, for example.)
+    // Again no need to return that this card contains refs that
+    // point into the collection set.
+    return false;  // Not in the G1 heap (might be in perm, for example.)
   }
   // Why do we have to check here whether a card is on a young region,
   // given that we dirty young regions and, as a result, the
@@ -743,7 +857,7 @@
   // and it doesn't happen often, but it can happen. So, the extra
   // check below filters out those cards.
   if (r->is_young()) {
-    return;
+    return false;
   }
   // While we are processing RSet buffers during the collection, we
   // actually don't want to scan any cards on the collection set,
@@ -756,7 +870,7 @@
   // however, that if evacuation fails, we have to scan any objects
   // that were not moved and create any missing entries.
   if (r->in_collection_set()) {
-    return;
+    return false;
   }
 
   // Should we defer processing the card?
@@ -797,8 +911,14 @@
   //                  cache.
   //                  Immediately process res; no need to process card_ptr.
 
+
   jbyte* res = card_ptr;
   bool defer = false;
+
+  // This gets set to true if the card being refined has references
+  // that point into the collection set.
+  bool oops_into_cset = false;
+
   if (_cg1r->use_cache()) {
     jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
     if (res != NULL && (res != card_ptr || defer)) {
@@ -815,14 +935,31 @@
         // Process card pointer we get back from the hot card cache. This
         // will check whether the region containing the card is young
         // _after_ checking that the region has been allocated from.
-        concurrentRefineOneCard_impl(res, worker_i);
+        oops_into_cset = concurrentRefineOneCard_impl(res, worker_i,
+                                                      false /* check_for_refs_into_cset */);
+        // The above call to concurrentRefineOneCard_impl is only
+        // performed if the hot card cache is enabled. This cache is
+        // disabled during an evacuation pause - which is the only
+        // time when we need know if the card contains references
+        // that point into the collection set. Also when the hot card
+        // cache is enabled, this code is executed by the concurrent
+        // refine threads - rather than the GC worker threads - and
+        // concurrentRefineOneCard_impl will return false.
+        assert(!oops_into_cset, "should not see true here");
       }
     }
   }
 
   if (!defer) {
-    concurrentRefineOneCard_impl(card_ptr, worker_i);
+    oops_into_cset =
+      concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset);
+    // We should only be detecting that the card contains references
+    // that point into the collection set if the current thread is
+    // a GC worker thread.
+    assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(),
+           "invalid result at non safepoint");
   }
+  return oops_into_cset;
 }
 
 class HRRSStatsIter: public HeapRegionClosure {
@@ -920,6 +1057,7 @@
 
   }
 }
+
 void HRInto_G1RemSet::prepare_for_verify() {
   if (G1HRRSFlushLogBuffersOnVerify &&
       (VerifyBeforeGC || VerifyAfterGC)
@@ -932,7 +1070,9 @@
     }
     bool cg1r_use_cache = _cg1r->use_cache();
     _cg1r->set_use_cache(false);
-    updateRS(0);
+    DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
+    updateRS(&into_cset_dcq, 0);
+    _g1->into_cset_dirty_card_queue_set().clear();
     _cg1r->set_use_cache(cg1r_use_cache);
 
     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,7 +83,13 @@
   // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   // join and leave around parts that must be atomic wrt GC.  (NULL means
   // being done at a safepoint.)
-  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+  // With some implementations of this routine, when check_for_refs_into_cset
+  // is true, a true result may be returned if the given card contains oops
+  // that have references into the current collection set.
+  virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                       bool check_for_refs_into_cset) {
+    return false;
+  }
 
   // Print any relevant summary info.
   virtual void print_summary_info() {}
@@ -143,23 +149,21 @@
   size_t              _total_cards_scanned;
 
   // _par_traversal_in_progress is "true" iff a parallel traversal is in
-  // progress.  If so, then cards added to remembered sets should also have
-  // their references into the collection summarized in "_new_refs".
+  // progress.
   bool _par_traversal_in_progress;
   void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
-  GrowableArray<OopOrNarrowOopStar>** _new_refs;
-  template <class T> void new_refs_iterate_work(OopClosure* cl);
-  void new_refs_iterate(OopClosure* cl) {
-    if (UseCompressedOops) {
-      new_refs_iterate_work<narrowOop>(cl);
-    } else {
-      new_refs_iterate_work<oop>(cl);
-    }
-  }
+
+  // Used for caching the closure that is responsible for scanning
+  // references into the collection set.
+  OopsInHeapRegionClosure** _cset_rs_update_cl;
 
   // The routine that performs the actual work of refining a dirty
   // card.
-  void concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i);
+  // If check_for_refs_into_refs is true then a true result is returned
+  // if the card contains oops that have references into the current
+  // collection set.
+  bool concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
+                                    bool check_for_refs_into_cset);
 
 protected:
   template <class T> void write_ref_nv(HeapRegion* from, T* p);
@@ -188,7 +192,7 @@
       scanNewRefsRS_work<oop>(oc, worker_i);
     }
   }
-  void updateRS(int worker_i);
+  void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);
   HeapRegion* calculateStartRegion(int i);
 
   HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
@@ -219,7 +223,11 @@
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
                  int worker_num, int claim_val);
 
-  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+  // If check_for_refs_into_cset is true then a true result is returned
+  // if the card contains oops that have references into the current
+  // collection set.
+  virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                       bool check_for_refs_into_cset);
 
   virtual void print_summary_info();
   virtual void prepare_for_verify();
@@ -265,3 +273,16 @@
   //  bool idempotent() { return true; }
   bool apply_to_weak_ref_discovered_field() { return true; }
 };
+
+class UpdateRSetImmediate: public OopsInHeapRegionClosure {
+private:
+  G1RemSet* _g1_rem_set;
+
+  template <class T> void do_oop_work(T* p);
+public:
+  UpdateRSetImmediate(G1RemSet* rs) :
+    _g1_rem_set(rs) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,19 +56,25 @@
     assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
   }
 #endif // ASSERT
-  assert(from == NULL || from->is_in_reserved(p),
-         "p is not in from");
+
+  assert(from == NULL || from->is_in_reserved(p), "p is not in from");
+
   HeapRegion* to = _g1->heap_region_containing(obj);
   // The test below could be optimized by applying a bit op to to and from.
   if (to != NULL && from != NULL && from != to) {
-    // There is a tricky infinite loop if we keep pushing
-    // self forwarding pointers onto our _new_refs list.
     // The _par_traversal_in_progress flag is true during the collection pause,
-    // false during the evacuation failure handing.
+    // false during the evacuation failure handing. This should avoid a
+    // potential loop if we were to add the card containing 'p' to the DCQS
+    // that's used to regenerate the remembered sets for the collection set,
+    // in the event of an evacuation failure, here. The UpdateRSImmediate
+    // closure will eventally call this routine.
     if (_par_traversal_in_progress &&
         to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push((void*)p);
-      // Deferred updates to the Cset are either discarded (in the normal case),
+
+      assert(_cset_rs_update_cl[tid] != NULL, "should have been set already");
+      _cset_rs_update_cl[tid]->do_oop(p);
+
+      // Deferred updates to the CSet are either discarded (in the normal case),
       // or processed (if an evacuation failure occurs) at the end
       // of the collection.
       // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do().
@@ -89,3 +95,12 @@
   assert(_from != NULL, "from region must be non-NULL");
   _rs->par_write_ref(_from, p, _worker_i);
 }
+
+template <class T> inline void UpdateRSetImmediate::do_oop_work(T* p) {
+  assert(_from->is_in_reserved(p), "paranoia");
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
+    _g1_rem_set->par_write_ref(_from, p, 0);
+  }
+}
+
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Aug 02 12:51:43 2010 -0700
@@ -683,6 +683,8 @@
     return NULL;
   }
 
+  assert(!is_young(), "check value of filter_young");
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
   HeapWord* cur = block_start(mr.start());
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1	Sat Jul 31 15:10:59 2010 +0100
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1	Mon Aug 02 12:51:43 2010 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -241,6 +241,7 @@
 
 g1MMUTracker.hpp			debug.hpp
 g1MMUTracker.hpp			allocation.hpp
+
 g1RemSet.cpp				bufferingOopClosure.hpp
 g1RemSet.cpp				concurrentG1Refine.hpp
 g1RemSet.cpp				concurrentG1RefineThread.hpp