Merge
authorjohnc
Thu, 27 Jan 2011 13:42:28 -0800
changeset 8075 582dd25571b2
parent 8070 e649504b405b (current diff)
parent 8074 b5905b50b907 (diff)
child 8076 96d498ec7ae1
Merge
hotspot/src/share/vm/runtime/arguments.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -1055,7 +1055,12 @@
       do {
         double start_vtime_sec = os::elapsedVTime();
         double start_time_sec = os::elapsedTime();
-        the_task->do_marking_step(10.0);
+        double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+
+        the_task->do_marking_step(mark_step_duration_ms,
+                                  true /* do_stealing    */,
+                                  true /* do_termination */);
+
         double end_time_sec = os::elapsedTime();
         double end_vtime_sec = os::elapsedVTime();
         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
@@ -1111,7 +1116,8 @@
 
   _restart_for_overflow = false;
 
-  set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
+  size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+  set_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (parallel_marking_threads() > 0)
@@ -1176,6 +1182,12 @@
                                /* silent */           false,
                                /* use_prev_marking */ false);
     }
+    assert(!restart_for_overflow(), "sanity");
+  }
+
+  // Reset the marking state if marking completed
+  if (!restart_for_overflow()) {
+    set_non_marking_state();
   }
 
 #if VERIFY_OBJS_PROCESSED
@@ -1500,21 +1512,19 @@
   size_t _max_live_bytes;
   size_t _regions_claimed;
   size_t _freed_bytes;
-  FreeRegionList _local_cleanup_list;
-  HumongousRegionSet _humongous_proxy_set;
+  FreeRegionList* _local_cleanup_list;
+  HumongousRegionSet* _humongous_proxy_set;
+  HRRSCleanupTask* _hrrs_cleanup_task;
   double _claimed_region_time;
   double _max_region_time;
 
 public:
   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
-                             int worker_num);
+                             int worker_num,
+                             FreeRegionList* local_cleanup_list,
+                             HumongousRegionSet* humongous_proxy_set,
+                             HRRSCleanupTask* hrrs_cleanup_task);
   size_t freed_bytes() { return _freed_bytes; }
-  FreeRegionList* local_cleanup_list() {
-    return &_local_cleanup_list;
-  }
-  HumongousRegionSet* humongous_proxy_set() {
-    return &_humongous_proxy_set;
-  }
 
   bool doHeapRegion(HeapRegion *r);
 
@@ -1541,7 +1551,12 @@
 
   void work(int i) {
     double start = os::elapsedTime();
-    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i);
+    FreeRegionList local_cleanup_list("Local Cleanup List");
+    HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
+    HRRSCleanupTask hrrs_cleanup_task;
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
+                                           &humongous_proxy_set,
+                                           &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
                                             HeapRegion::NoteEndClaimValue);
@@ -1553,15 +1568,17 @@
     // Now update the lists
     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
                                             NULL /* free_list */,
-                                            g1_note_end.humongous_proxy_set(),
+                                            &humongous_proxy_set,
                                             true /* par */);
     {
       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
       _max_live_bytes += g1_note_end.max_live_bytes();
       _freed_bytes += g1_note_end.freed_bytes();
 
-      _cleanup_list->add_as_tail(g1_note_end.local_cleanup_list());
-      assert(g1_note_end.local_cleanup_list()->is_empty(), "post-condition");
+      _cleanup_list->add_as_tail(&local_cleanup_list);
+      assert(local_cleanup_list.is_empty(), "post-condition");
+
+      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
     }
     double end = os::elapsedTime();
     if (G1PrintParCleanupStats) {
@@ -1602,13 +1619,17 @@
 
 G1NoteEndOfConcMarkClosure::
 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
-                           int worker_num)
+                           int worker_num,
+                           FreeRegionList* local_cleanup_list,
+                           HumongousRegionSet* humongous_proxy_set,
+                           HRRSCleanupTask* hrrs_cleanup_task)
   : _g1(g1), _worker_num(worker_num),
     _max_live_bytes(0), _regions_claimed(0),
     _freed_bytes(0),
     _claimed_region_time(0.0), _max_region_time(0.0),
-    _local_cleanup_list("Local Cleanup List"),
-    _humongous_proxy_set("Local Cleanup Humongous Proxy Set") { }
+    _local_cleanup_list(local_cleanup_list),
+    _humongous_proxy_set(humongous_proxy_set),
+    _hrrs_cleanup_task(hrrs_cleanup_task) { }
 
 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
   // We use a claim value of zero here because all regions
@@ -1619,11 +1640,12 @@
     _regions_claimed++;
     hr->note_end_of_marking();
     _max_live_bytes += hr->max_live_bytes();
-    _g1->free_region_if_totally_empty(hr,
-                                      &_freed_bytes,
-                                      &_local_cleanup_list,
-                                      &_humongous_proxy_set,
-                                      true /* par */);
+    _g1->free_region_if_empty(hr,
+                              &_freed_bytes,
+                              _local_cleanup_list,
+                              _humongous_proxy_set,
+                              _hrrs_cleanup_task,
+                              true /* par */);
     double region_time = (os::elapsedTime() - start);
     _claimed_region_time += region_time;
     if (region_time > _max_region_time) _max_region_time = region_time;
@@ -1659,6 +1681,8 @@
 
   double start = os::elapsedTime();
 
+  HeapRegionRemSet::reset_for_cleanup_tasks();
+
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
                                         &_region_bm, &_card_bm);
@@ -1853,6 +1877,8 @@
   assert(local_free_list.is_empty(), "post-condition");
 }
 
+// Support closures for reference procssing in G1
+
 bool G1CMIsAliveClosure::do_object_b(oop obj) {
   HeapWord* addr = (HeapWord*)obj;
   return addr != NULL &&
@@ -1873,11 +1899,17 @@
   virtual void do_oop(      oop* p) { do_oop_work(p); }
 
   template <class T> void do_oop_work(T* p) {
-    oop thisOop = oopDesc::load_decode_heap_oop(p);
-    HeapWord* addr = (HeapWord*)thisOop;
-    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    HeapWord* addr = (HeapWord*)obj;
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("\t[0] we're looking at location "
+                               "*"PTR_FORMAT" = "PTR_FORMAT,
+                               p, (void*) obj);
+
+    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
       _bitMap->mark(addr);
-      _cm->mark_stack_push(thisOop);
+      _cm->mark_stack_push(obj);
     }
   }
 };
@@ -1899,6 +1931,199 @@
   }
 };
 
+// 'Keep Alive' closure used by parallel reference processing.
+// An instance of this closure is used in the parallel reference processing
+// code rather than an instance of G1CMKeepAliveClosure. We could have used
+// the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
+// placed on to discovered ref lists once so we can mark and push with no
+// need to check whether the object has already been marked. Using the
+// G1CMKeepAliveClosure would mean, however, having all the worker threads
+// operating on the global mark stack. This means that an individual
+// worker would be doing lock-free pushes while it processes its own
+// discovered ref list followed by drain call. If the discovered ref lists
+// are unbalanced then this could cause interference with the other
+// workers. Using a CMTask (and its embedded local data structures)
+// avoids that potential interference.
+class G1CMParKeepAliveAndDrainClosure: public OopClosure {
+  ConcurrentMark*  _cm;
+  CMTask*          _task;
+  CMBitMap*        _bitMap;
+  int              _ref_counter_limit;
+  int              _ref_counter;
+ public:
+  G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
+                                  CMTask* task,
+                                  CMBitMap* bitMap) :
+    _cm(cm), _task(task), _bitMap(bitMap),
+    _ref_counter_limit(G1RefProcDrainInterval)
+  {
+    assert(_ref_counter_limit > 0, "sanity");
+    _ref_counter = _ref_counter_limit;
+  }
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  template <class T> void do_oop_work(T* p) {
+    if (!_cm->has_overflown()) {
+      oop obj = oopDesc::load_decode_heap_oop(p);
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("\t[%d] we're looking at location "
+                               "*"PTR_FORMAT" = "PTR_FORMAT,
+                               _task->task_id(), p, (void*) obj);
+
+      _task->deal_with_reference(obj);
+      _ref_counter--;
+
+      if (_ref_counter == 0) {
+        // We have dealt with _ref_counter_limit references, pushing them and objects
+        // reachable from them on to the local stack (and possibly the global stack).
+        // Call do_marking_step() to process these entries. We call the routine in a
+        // loop, which we'll exit if there's nothing more to do (i.e. we're done
+        // with the entries that we've pushed as a result of the deal_with_reference
+        // calls above) or we overflow.
+        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
+        // while there may still be some work to do. (See the comment at the
+        // beginning of CMTask::do_marking_step() for those conditions - one of which
+        // is reaching the specified time target.) It is only when
+        // CMTask::do_marking_step() returns without setting the has_aborted() flag
+        // that the marking has completed.
+        do {
+          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+          _task->do_marking_step(mark_step_duration_ms,
+                                 false /* do_stealing    */,
+                                 false /* do_termination */);
+        } while (_task->has_aborted() && !_cm->has_overflown());
+        _ref_counter = _ref_counter_limit;
+      }
+    } else {
+       if (_cm->verbose_high())
+         gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
+    }
+  }
+};
+
+class G1CMParDrainMarkingStackClosure: public VoidClosure {
+  ConcurrentMark* _cm;
+  CMTask* _task;
+ public:
+  G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
+    _cm(cm), _task(task)
+  {}
+
+  void do_void() {
+    do {
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
+
+      // We call CMTask::do_marking_step() to completely drain the local and
+      // global marking stacks. The routine is called in a loop, which we'll
+      // exit if there's nothing more to do (i.e. we'completely drained the
+      // entries that were pushed as a result of applying the
+      // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
+      // lists above) or we overflow the global marking stack.
+      // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
+      // while there may still be some work to do. (See the comment at the
+      // beginning of CMTask::do_marking_step() for those conditions - one of which
+      // is reaching the specified time target.) It is only when
+      // CMTask::do_marking_step() returns without setting the has_aborted() flag
+      // that the marking has completed.
+
+      _task->do_marking_step(1000000000.0 /* something very large */,
+                             true /* do_stealing    */,
+                             true /* do_termination */);
+    } while (_task->has_aborted() && !_cm->has_overflown());
+  }
+};
+
+// Implementation of AbstractRefProcTaskExecutor for G1
+class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
+private:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+  CMBitMap*        _bitmap;
+  WorkGang*        _workers;
+  int              _active_workers;
+
+public:
+  G1RefProcTaskExecutor(G1CollectedHeap* g1h,
+                        ConcurrentMark* cm,
+                        CMBitMap* bitmap,
+                        WorkGang* workers,
+                        int n_workers) :
+    _g1h(g1h), _cm(cm), _bitmap(bitmap),
+    _workers(workers), _active_workers(n_workers)
+  { }
+
+  // Executes the given task using concurrent marking worker threads.
+  virtual void execute(ProcessTask& task);
+  virtual void execute(EnqueueTask& task);
+};
+
+class G1RefProcTaskProxy: public AbstractGangTask {
+  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
+  ProcessTask&     _proc_task;
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+  CMBitMap*        _bitmap;
+
+public:
+  G1RefProcTaskProxy(ProcessTask& proc_task,
+                     G1CollectedHeap* g1h,
+                     ConcurrentMark* cm,
+                     CMBitMap* bitmap) :
+    AbstractGangTask("Process reference objects in parallel"),
+    _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
+  {}
+
+  virtual void work(int i) {
+    CMTask* marking_task = _cm->task(i);
+    G1CMIsAliveClosure g1_is_alive(_g1h);
+    G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
+    G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
+
+    _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
+  }
+};
+
+void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
+  assert(_workers != NULL, "Need parallel worker threads.");
+
+  G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
+
+  // We need to reset the phase for each task execution so that
+  // the termination protocol of CMTask::do_marking_step works.
+  _cm->set_phase(_active_workers, false /* concurrent */);
+  _g1h->set_par_threads(_active_workers);
+  _workers->run_task(&proc_task_proxy);
+  _g1h->set_par_threads(0);
+}
+
+class G1RefEnqueueTaskProxy: public AbstractGangTask {
+  typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
+  EnqueueTask& _enq_task;
+
+public:
+  G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
+    AbstractGangTask("Enqueue reference objects in parallel"),
+    _enq_task(enq_task)
+  { }
+
+  virtual void work(int i) {
+    _enq_task.work(i);
+  }
+};
+
+void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
+  assert(_workers != NULL, "Need parallel worker threads.");
+
+  G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
+
+  _g1h->set_par_threads(_active_workers);
+  _workers->run_task(&enq_task_proxy);
+  _g1h->set_par_threads(0);
+}
+
 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
   ResourceMark rm;
   HandleMark   hm;
@@ -1917,18 +2142,52 @@
   G1CMDrainMarkingStackClosure
     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
 
-  // XXXYYY  Also: copy the parallel ref processing code from CMS.
-  rp->process_discovered_references(&g1_is_alive,
-                                    &g1_keep_alive,
-                                    &g1_drain_mark_stack,
-                                    NULL);
+  // We use the work gang from the G1CollectedHeap and we utilize all
+  // the worker threads.
+  int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1);
+
+  G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
+                                          g1h->workers(), active_workers);
+
+  if (rp->processing_is_mt()) {
+    // Set the degree of MT here.  If the discovery is done MT, there
+    // may have been a different number of threads doing the discovery
+    // and a different number of discovered lists may have Ref objects.
+    // That is OK as long as the Reference lists are balanced (see
+    // balance_all_queues() and balance_queues()).
+    rp->set_mt_degree(active_workers);
+
+    rp->process_discovered_references(&g1_is_alive,
+                                      &g1_keep_alive,
+                                      &g1_drain_mark_stack,
+                                      &par_task_executor);
+
+    // The work routines of the parallel keep_alive and drain_marking_stack
+    // will set the has_overflown flag if we overflow the global marking
+    // stack.
+  } else {
+    rp->process_discovered_references(&g1_is_alive,
+                                      &g1_keep_alive,
+                                      &g1_drain_mark_stack,
+                                      NULL);
+
+  }
+
   assert(_markStack.overflow() || _markStack.isEmpty(),
-         "mark stack should be empty (unless it overflowed)");
+      "mark stack should be empty (unless it overflowed)");
   if (_markStack.overflow()) {
+    // Should have been done already when we tried to push an
+    // entry on to the global mark stack. But let's do it again.
     set_has_overflown();
   }
 
-  rp->enqueue_discovered_references();
+  if (rp->processing_is_mt()) {
+    assert(rp->num_q() == active_workers, "why not");
+    rp->enqueue_discovered_references(&par_task_executor);
+  } else {
+    rp->enqueue_discovered_references();
+  }
+
   rp->verify_no_references_recorded();
   assert(!rp->discovery_enabled(), "should have been disabled");
 
@@ -1955,7 +2214,9 @@
       CMTask* task = _cm->task(worker_i);
       task->record_start_time();
       do {
-        task->do_marking_step(1000000000.0 /* something very large */);
+        task->do_marking_step(1000000000.0 /* something very large */,
+                              true /* do_stealing    */,
+                              true /* do_termination */);
       } while (task->has_aborted() && !_cm->has_overflown());
       // If we overflow, then we do not want to restart. We instead
       // want to abort remark and do concurrent marking again.
@@ -1978,7 +2239,7 @@
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all available threads
     int active_workers = ParallelGCThreads;
-    set_phase(active_workers, false);
+    set_phase(active_workers, false /* concurrent */);
 
     CMRemarkTask remarkTask(this);
     // We will start all available threads, even if we decide that the
@@ -1992,7 +2253,7 @@
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all available threads
     int active_workers = 1;
-    set_phase(active_workers, false);
+    set_phase(active_workers, false /* concurrent */);
 
     CMRemarkTask remarkTask(this);
     // We will start all available threads, even if we decide that the
@@ -2005,9 +2266,6 @@
 
   print_stats();
 
-  if (!restart_for_overflow())
-    set_non_marking_state();
-
 #if VERIFY_OBJS_PROCESSED
   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
@@ -3124,7 +3382,7 @@
           // do nothing
         }
 #else // _CHECK_BOTH_FINGERS_
-      // we will only check the global finger
+        // we will only check the global finger
 
         if (objAddr < global_finger) {
           // see long comment above
@@ -3249,7 +3507,7 @@
   double elapsed_time_ms = curr_time_ms - _start_time_ms;
   if (elapsed_time_ms > _time_target_ms) {
     set_has_aborted();
-    _has_aborted_timed_out = true;
+    _has_timed_out = true;
     statsOnly( ++_aborted_timed_out );
     return;
   }
@@ -3754,7 +4012,9 @@
 
  *****************************************************************************/
 
-void CMTask::do_marking_step(double time_target_ms) {
+void CMTask::do_marking_step(double time_target_ms,
+                             bool do_stealing,
+                             bool do_termination) {
   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
   assert(concurrent() == _cm->concurrent(), "they should be the same");
 
@@ -3794,7 +4054,7 @@
 
   // clear all flags
   clear_has_aborted();
-  _has_aborted_timed_out = false;
+  _has_timed_out = false;
   _draining_satb_buffers = false;
 
   ++_calls;
@@ -3970,7 +4230,7 @@
   drain_global_stack(false);
 
   // Attempt at work stealing from other task's queues.
-  if (!has_aborted()) {
+  if (do_stealing && !has_aborted()) {
     // We have not aborted. This means that we have finished all that
     // we could. Let's try to do some stealing...
 
@@ -4011,7 +4271,7 @@
 
   // We still haven't aborted. Now, let's try to get into the
   // termination protocol.
-  if (!has_aborted()) {
+  if (do_termination && !has_aborted()) {
     // We cannot check whether the global stack is empty, since other
     // tasks might be concurrently pushing objects on it. We also cannot
     // check if the region stack is empty because if a thread is aborting
@@ -4087,7 +4347,7 @@
 
     statsOnly( ++_aborted );
 
-    if (_has_aborted_timed_out) {
+    if (_has_timed_out) {
       double diff_ms = elapsed_time_ms - _time_target_ms;
       // Keep statistics of how well we did with respect to hitting
       // our target only if we actually timed out (if we aborted for
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Jan 27 13:42:28 2011 -0800
@@ -353,6 +353,10 @@
   friend class CMConcurrentMarkingTask;
   friend class G1ParNoteEndTask;
   friend class CalcLiveObjectsClosure;
+  friend class G1RefProcTaskProxy;
+  friend class G1RefProcTaskExecutor;
+  friend class G1CMParKeepAliveAndDrainClosure;
+  friend class G1CMParDrainMarkingStackClosure;
 
 protected:
   ConcurrentMarkThread* _cmThread;   // the thread doing the work
@@ -936,7 +940,7 @@
   // if this is true, then the task has aborted for some reason
   bool                        _has_aborted;
   // set when the task aborts because it has met its time quota
-  bool                        _has_aborted_timed_out;
+  bool                        _has_timed_out;
   // true when we're draining SATB buffers; this avoids the task
   // aborting due to SATB buffers being available (as we're already
   // dealing with them)
@@ -1041,7 +1045,7 @@
   // trying not to exceed the given duration. However, it might exit
   // prematurely, according to some conditions (i.e. SATB buffers are
   // available for processing).
-  void do_marking_step(double target_ms);
+  void do_marking_step(double target_ms, bool do_stealing, bool do_termination);
 
   // These two calls start and stop the timer
   void record_start_time() {
@@ -1063,7 +1067,8 @@
   bool has_aborted()            { return _has_aborted; }
   void set_has_aborted()        { _has_aborted = true; }
   void clear_has_aborted()      { _has_aborted = false; }
-  bool claimed() { return _claimed; }
+  bool has_timed_out()          { return _has_timed_out; }
+  bool claimed()                { return _claimed; }
 
   // Support routines for the partially scanned region that may be
   // recorded as a result of aborting while draining the CMRegionStack
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -251,7 +251,9 @@
 
         // Now do the remainder of the cleanup operation.
         _cm->completeCleanup();
+        _sts.join();
         g1_policy->record_concurrent_mark_cleanup_completed();
+        _sts.leave();
 
         double cleanup_end_sec = os::elapsedTime();
         if (PrintGC) {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -4925,10 +4925,11 @@
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }
 
-void G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr,
+void G1CollectedHeap::free_region_if_empty(HeapRegion* hr,
                                      size_t* pre_used,
                                      FreeRegionList* free_list,
                                      HumongousRegionSet* humongous_proxy_set,
+                                     HRRSCleanupTask* hrrs_cleanup_task,
                                      bool par) {
   if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
     if (hr->isHumongous()) {
@@ -4937,6 +4938,8 @@
     } else {
       free_region(hr, pre_used, free_list, par);
     }
+  } else {
+    hr->rem_set()->do_cleanup_work(hrrs_cleanup_task);
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Jan 27 13:42:28 2011 -0800
@@ -40,6 +40,7 @@
 
 class HeapRegion;
 class HeapRegionSeq;
+class HRRSCleanupTask;
 class PermanentGenerationSpec;
 class GenerationSpec;
 class OopsInHeapRegionClosure;
@@ -1099,11 +1100,12 @@
   // all dead. It calls either free_region() or
   // free_humongous_region() depending on the type of the region that
   // is passed to it.
-  void free_region_if_totally_empty(HeapRegion* hr,
-                                    size_t* pre_used,
-                                    FreeRegionList* free_list,
-                                    HumongousRegionSet* humongous_proxy_set,
-                                    bool par);
+  void free_region_if_empty(HeapRegion* hr,
+                            size_t* pre_used,
+                            FreeRegionList* free_list,
+                            HumongousRegionSet* humongous_proxy_set,
+                            HRRSCleanupTask* hrrs_cleanup_task,
+                            bool par);
 
   // It appends the free list to the master free list and updates the
   // master humongous list according to the contents of the proxy
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jan 27 13:42:28 2011 -0800
@@ -81,6 +81,14 @@
   product(intx, G1MarkRegionStackSize, 1024 * 1024,                         \
           "Size of the region stack for concurrent marking.")               \
                                                                             \
+  product(double, G1ConcMarkStepDurationMillis, 10.0,                       \
+          "Target duration of individual concurrent marking steps "         \
+          "in milliseconds.")                                               \
+                                                                            \
+  product(intx, G1RefProcDrainInterval, 10,                                 \
+          "The number of discovered reference objects to process before "   \
+          "draining concurrent marking work queues.")                       \
+                                                                            \
   develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
           "If true, count frac of ptr writes with null pre-vals.")          \
                                                                             \
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -463,7 +463,6 @@
   }
 
   static void par_contract_all();
-
 };
 
 void PosParPRT::par_contract_all() {
@@ -1070,6 +1069,11 @@
 
 }
 
+void
+OtherRegionsTable::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) {
+  _sparse_table.do_cleanup_work(hrrs_cleanup_task);
+}
+
 // Determines how many threads can add records to an rset in parallel.
 // This can be done by either mutator threads together with the
 // concurrent refinement threads or GC threads.
@@ -1384,6 +1388,19 @@
   }
 }
 
+void HeapRegionRemSet::reset_for_cleanup_tasks() {
+  SparsePRT::reset_for_cleanup_tasks();
+}
+
+void HeapRegionRemSet::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) {
+  _other_regions.do_cleanup_work(hrrs_cleanup_task);
+}
+
+void
+HeapRegionRemSet::finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task) {
+  SparsePRT::finish_cleanup_task(hrrs_cleanup_task);
+}
+
 #ifndef PRODUCT
 void HeapRegionRemSet::test() {
   os::sleep(Thread::current(), (jlong)5000, false);
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Jan 27 13:42:28 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,10 @@
 class PosParPRT;
 class SparsePRT;
 
+// Essentially a wrapper around SparsePRTCleanupTask. See
+// sparsePRT.hpp for more details.
+class HRRSCleanupTask : public SparsePRTCleanupTask {
+};
 
 // The "_coarse_map" is a bitmap with one bit for each region, where set
 // bits indicate that the corresponding region may contain some pointer
@@ -156,6 +160,8 @@
   // "from_hr" is being cleared; remove any entries from it.
   void clear_incoming_entry(HeapRegion* from_hr);
 
+  void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
+
   // Declare the heap size (in # of regions) to the OtherRegionsTable.
   // (Uses it to initialize from_card_cache).
   static void init_from_card_cache(size_t max_regions);
@@ -165,10 +171,8 @@
   static void shrink_from_card_cache(size_t new_n_regs);
 
   static void print_from_card_cache();
-
 };
 
-
 class HeapRegionRemSet : public CHeapObj {
   friend class VMStructs;
   friend class HeapRegionRemSetIterator;
@@ -342,11 +346,16 @@
   static void print_recorded();
   static void record_event(Event evnt);
 
+  // These are wrappers for the similarly-named methods on
+  // SparsePRT. Look at sparsePRT.hpp for more details.
+  static void reset_for_cleanup_tasks();
+  void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
+  static void finish_cleanup_task(HRRSCleanupTask* hrrs_cleanup_task);
+
   // Run unit tests.
 #ifndef PRODUCT
   static void test();
 #endif
-
 };
 
 class HeapRegionRemSetIterator : public CHeapObj {
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -195,10 +195,10 @@
     assert(0 <= res && res < _regions.length(),
            err_msg("res: %d should be valid", res));
     _alloc_search_start = res + (int) num;
+    assert(0 < _alloc_search_start && _alloc_search_start <= _regions.length(),
+           err_msg("_alloc_search_start: %d should be valid",
+                   _alloc_search_start));
   }
-  assert(0 < _alloc_search_start && _alloc_search_start <= _regions.length(),
-         err_msg("_alloc_search_start: %d should be valid",
-                 _alloc_search_start));
   return res;
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -415,6 +415,38 @@
   return NULL;
 }
 
+void SparsePRT::reset_for_cleanup_tasks() {
+  _head_expanded_list = NULL;
+}
+
+void SparsePRT::do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task) {
+  if (should_be_on_expanded_list()) {
+    sprt_cleanup_task->add(this);
+  }
+}
+
+void SparsePRT::finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task) {
+  assert(ParGCRareEvent_lock->owned_by_self(), "pre-condition");
+  SparsePRT* head = sprt_cleanup_task->head();
+  SparsePRT* tail = sprt_cleanup_task->tail();
+  if (head != NULL) {
+    assert(tail != NULL, "if head is not NULL, so should tail");
+
+    tail->set_next_expanded(_head_expanded_list);
+    _head_expanded_list = head;
+  } else {
+    assert(tail == NULL, "if head is NULL, so should tail");
+  }
+}
+
+bool SparsePRT::should_be_on_expanded_list() {
+  if (_expanded) {
+    assert(_cur != _next, "if _expanded is true, cur should be != _next");
+  } else {
+    assert(_cur == _next, "if _expanded is false, cur should be == _next");
+  }
+  return expanded();
+}
 
 void SparsePRT::cleanup_all() {
   // First clean up all expanded tables so they agree on next and cur.
@@ -484,6 +516,7 @@
     _cur->clear();
   }
   _next = _cur;
+  _expanded = false;
 }
 
 void SparsePRT::cleanup() {
@@ -518,3 +551,15 @@
   }
   add_to_expanded_list(this);
 }
+
+void SparsePRTCleanupTask::add(SparsePRT* sprt) {
+  assert(sprt->should_be_on_expanded_list(), "pre-condition");
+
+  sprt->set_next_expanded(NULL);
+  if (_tail != NULL) {
+    _tail->set_next_expanded(sprt);
+  } else {
+    _head = sprt;
+  }
+  _tail = sprt;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Thu Jan 27 13:42:28 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -212,8 +212,11 @@
 // mutex.
 
 class SparsePRTIter;
+class SparsePRTCleanupTask;
 
 class SparsePRT VALUE_OBJ_CLASS_SPEC {
+  friend class SparsePRTCleanupTask;
+
   //  Iterations are done on the _cur hash table, since they only need to
   //  see entries visible at the start of a collection pause.
   //  All other operations are done using the _next hash table.
@@ -238,6 +241,8 @@
   SparsePRT* next_expanded() { return _next_expanded; }
   void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
 
+  bool should_be_on_expanded_list();
+
   static SparsePRT* _head_expanded_list;
 
 public:
@@ -284,12 +289,36 @@
   static void add_to_expanded_list(SparsePRT* sprt);
   static SparsePRT* get_from_expanded_list();
 
+  // The purpose of these three methods is to help the GC workers
+  // during the cleanup pause to recreate the expanded list, purging
+  // any tables from it that belong to regions that are freed during
+  // cleanup (if we don't purge those tables, there is a race that
+  // causes various crashes; see CR 7014261).
+  //
+  // We chose to recreate the expanded list, instead of purging
+  // entries from it by iterating over it, to avoid this serial phase
+  // at the end of the cleanup pause.
+  //
+  // The three methods below work as follows:
+  // * reset_for_cleanup_tasks() : Nulls the expanded list head at the
+  //   start of the cleanup pause.
+  // * do_cleanup_work() : Called by the cleanup workers for every
+  //   region that is not free / is being freed by the cleanup
+  //   pause. It creates a list of expanded tables whose head / tail
+  //   are on the thread-local SparsePRTCleanupTask object.
+  // * finish_cleanup_task() : Called by the cleanup workers after
+  //   they complete their cleanup task. It adds the local list into
+  //   the global expanded list. It assumes that the
+  //   ParGCRareEvent_lock is being held to ensure MT-safety.
+  static void reset_for_cleanup_tasks();
+  void do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task);
+  static void finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task);
+
   bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const {
     return _next->contains_card(region_id, card_index);
   }
 };
 
-
 class SparsePRTIter: public RSHashTableIter {
 public:
   void init(const SparsePRT* sprt) {
@@ -300,4 +329,22 @@
   }
 };
 
+// This allows each worker during a cleanup pause to create a
+// thread-local list of sparse tables that have been expanded and need
+// to be processed at the beginning of the next GC pause. This lists
+// are concatenated into the single expanded list at the end of the
+// cleanup pause.
+class SparsePRTCleanupTask VALUE_OBJ_CLASS_SPEC {
+private:
+  SparsePRT* _head;
+  SparsePRT* _tail;
+
+public:
+  SparsePRTCleanupTask() : _head(NULL), _tail(NULL) { }
+
+  void add(SparsePRT* sprt);
+  SparsePRT* head() { return _head; }
+  SparsePRT* tail() { return _tail; }
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Thu Jan 27 14:05:59 2011 -0500
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Thu Jan 27 13:42:28 2011 -0800
@@ -1941,10 +1941,16 @@
     status = false;
   }
 
+#ifndef SERIALGC
   if (UseG1GC) {
     status = status && verify_percentage(InitiatingHeapOccupancyPercent,
                                          "InitiatingHeapOccupancyPercent");
+    status = status && verify_min_value(G1RefProcDrainInterval, 1,
+                                        "G1RefProcDrainInterval");
+    status = status && verify_min_value((intx)G1ConcMarkStepDurationMillis, 1,
+                                        "G1ConcMarkStepDurationMillis");
   }
+#endif
 
   status = status && verify_interval(RefDiscoveryPolicy,
                                      ReferenceProcessor::DiscoveryPolicyMin,