6941395: G1: Use only lock-free versions of region stack push() and pop()
authorjohnc
Tue, 28 Sep 2010 09:51:37 -0700
changeset 6761 f9191297ce83
parent 6760 532b60549a90
child 6762 f8d1b560700e
6941395: G1: Use only lock-free versions of region stack push() and pop() Summary: Re-enable use of the lock-free versions of region stack push() and pop() by recording aborted regions in a thread-local structure, which are then processed when scanning of the region stack restarts. The previous locking versions of these routines are retained for diagnostic purposes. Reviewed-by: tonyp, ysr
hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Sep 08 16:10:51 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Sep 28 09:51:37 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -278,15 +278,16 @@
   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 }
 
-void CMRegionStack::push(MemRegion mr) {
+void CMRegionStack::push_lock_free(MemRegion mr) {
   assert(mr.word_size() > 0, "Precondition");
   while (true) {
-    if (isFull()) {
+    jint index = _index;
+
+    if (index >= _capacity) {
       _overflow = true;
       return;
     }
     // Otherwise...
-    jint index = _index;
     jint next_index = index+1;
     jint res = Atomic::cmpxchg(next_index, &_index, index);
     if (res == index) {
@@ -297,19 +298,17 @@
   }
 }
 
-// Currently we do not call this at all. Normally we would call it
-// during the concurrent marking / remark phases but we now call
-// the lock-based version instead. But we might want to resurrect this
-// code in the future. So, we'll leave it here commented out.
-#if 0
-MemRegion CMRegionStack::pop() {
+// Lock-free pop of the region stack. Called during the concurrent
+// marking / remark phases. Should only be called in tandem with
+// other lock-free pops.
+MemRegion CMRegionStack::pop_lock_free() {
   while (true) {
-    // Otherwise...
     jint index = _index;
 
     if (index == 0) {
       return MemRegion();
     }
+    // Otherwise...
     jint next_index = index-1;
     jint res = Atomic::cmpxchg(next_index, &_index, index);
     if (res == index) {
@@ -326,7 +325,11 @@
     // Otherwise, we need to try again.
   }
 }
-#endif // 0
+
+#if 0
+// The routines that manipulate the region stack with a lock are
+// not currently used. They should be retained, however, as a
+// diagnostic aid.
 
 void CMRegionStack::push_with_lock(MemRegion mr) {
   assert(mr.word_size() > 0, "Precondition");
@@ -361,6 +364,7 @@
     }
   }
 }
+#endif
 
 bool CMRegionStack::invalidate_entries_into_cset() {
   bool result = false;
@@ -648,8 +652,9 @@
   // We do reset all of them, since different phases will use
   // different number of active threads. So, it's easiest to have all
   // of them ready.
-  for (int i = 0; i < (int) _max_task_num; ++i)
+  for (int i = 0; i < (int) _max_task_num; ++i) {
     _tasks[i]->reset(_nextMarkBitMap);
+  }
 
   // we need this to make sure that the flag is on during the evac
   // pause with initial mark piggy-backed
@@ -988,7 +993,7 @@
                              "below the finger, pushing it",
                              mr.start(), mr.end());
 
-    if (!region_stack_push(mr)) {
+    if (!region_stack_push_lock_free(mr)) {
       if (verbose_low())
         gclog_or_tty->print_cr("[global] region stack has overflown.");
     }
@@ -2333,6 +2338,39 @@
   return NULL;
 }
 
+bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  bool result = false;
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* the_task = _tasks[i];
+    MemRegion mr = the_task->aborted_region();
+    if (mr.start() != NULL) {
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
+      HeapRegion* hr = _g1h->heap_region_containing(mr.start());
+      assert(hr != NULL, "invariant");
+      if (hr->in_collection_set()) {
+        // The region points into the collection set
+        the_task->set_aborted_region(MemRegion());
+        result = true;
+      }
+    }
+  }
+  return result;
+}
+
+bool ConcurrentMark::has_aborted_regions() {
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* the_task = _tasks[i];
+    MemRegion mr = the_task->aborted_region();
+    if (mr.start() != NULL) {
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
+      return true;
+    }
+  }
+  return false;
+}
+
 void ConcurrentMark::oops_do(OopClosure* cl) {
   if (_markStack.size() > 0 && verbose_low())
     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
@@ -2351,13 +2389,22 @@
     queue->oops_do(cl);
   }
 
-  // finally, invalidate any entries that in the region stack that
+  // Invalidate any entries, that are in the region stack, that
   // point into the collection set
   if (_regionStack.invalidate_entries_into_cset()) {
     // otherwise, any gray objects copied during the evacuation pause
     // might not be visited.
     assert(_should_gray_objects, "invariant");
   }
+
+  // Invalidate any aborted regions, recorded in the individual CM
+  // tasks, that point into the collection set.
+  if (invalidate_aborted_regions_in_cset()) {
+    // otherwise, any gray objects copied during the evacuation pause
+    // might not be visited.
+    assert(_should_gray_objects, "invariant");
+  }
+
 }
 
 void ConcurrentMark::clear_marking_state() {
@@ -2638,7 +2685,7 @@
   // irrespective whether all collection set regions are below the
   // finger, if the region stack is not empty. This is expected to be
   // a rare case, so I don't think it's necessary to be smarted about it.
-  if (!region_stack_empty())
+  if (!region_stack_empty() || has_aborted_regions())
     _should_gray_objects = true;
 }
 
@@ -2657,8 +2704,10 @@
   _nextMarkBitMap->clearAll();
   // Empty mark stack
   clear_marking_state();
-  for (int i = 0; i < (int)_max_task_num; ++i)
+  for (int i = 0; i < (int)_max_task_num; ++i) {
     _tasks[i]->clear_region_fields();
+    _tasks[i]->clear_aborted_region();
+  }
   _has_aborted = true;
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
@@ -2936,6 +2985,7 @@
 
   _nextMarkBitMap                = nextMarkBitMap;
   clear_region_fields();
+  clear_aborted_region();
 
   _calls                         = 0;
   _elapsed_time_ms               = 0.0;
@@ -3428,20 +3478,32 @@
   assert(_region_finger == NULL,
          "it should be NULL when we're not scanning a region");
 
-  if (!_cm->region_stack_empty()) {
+  if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
     if (_cm->verbose_low())
       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
                              _task_id, _cm->region_stack_size());
 
-    MemRegion mr = _cm->region_stack_pop_with_lock();
-    // it returns MemRegion() if the pop fails
-    statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+    MemRegion mr;
+
+    if (!_aborted_region.is_empty()) {
+      mr = _aborted_region;
+      _aborted_region = MemRegion();
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
+                             _task_id, mr.start(), mr.end());
+    } else {
+      mr = _cm->region_stack_pop_lock_free();
+      // it returns MemRegion() if the pop fails
+      statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+    }
 
     while (mr.start() != NULL) {
       if (_cm->verbose_medium())
         gclog_or_tty->print_cr("[%d] we are scanning region "
                                "["PTR_FORMAT", "PTR_FORMAT")",
                                _task_id, mr.start(), mr.end());
+
       assert(mr.end() <= _cm->finger(),
              "otherwise the region shouldn't be on the stack");
       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
@@ -3454,7 +3516,7 @@
         if (has_aborted())
           mr = MemRegion();
         else {
-          mr = _cm->region_stack_pop_with_lock();
+          mr = _cm->region_stack_pop_lock_free();
           // it returns MemRegion() if the pop fails
           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
         }
@@ -3468,6 +3530,10 @@
         // have definitely set _region_finger to something non-null.
         assert(_region_finger != NULL, "invariant");
 
+        // Make sure that any previously aborted region has been
+        // cleared.
+        assert(_aborted_region.is_empty(), "aborted region not cleared");
+
         // The iteration was actually aborted. So now _region_finger
         // points to the address of the object we last scanned. If we
         // leave it there, when we restart this task, we will rescan
@@ -3480,14 +3546,14 @@
 
         if (!newRegion.is_empty()) {
           if (_cm->verbose_low()) {
-            gclog_or_tty->print_cr("[%d] pushing unscanned region"
-                                   "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
+            gclog_or_tty->print_cr("[%d] recording unscanned region"
+                                   "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
                                    _task_id,
                                    newRegion.start(), newRegion.end());
           }
-          // Now push the part of the region we didn't scan on the
-          // region stack to make sure a task scans it later.
-          _cm->region_stack_push_with_lock(newRegion);
+          // Now record the part of the region we didn't scan to
+          // make sure this task scans it later.
+          _aborted_region = newRegion;
         }
         // break from while
         mr = MemRegion();
@@ -3657,6 +3723,8 @@
 
   assert(concurrent() || _cm->region_stack_empty(),
          "the region stack should have been cleared before remark");
+  assert(concurrent() || !_cm->has_aborted_regions(),
+         "aborted regions should have been cleared before remark");
   assert(_region_finger == NULL,
          "this should be non-null only when a region is being scanned");
 
@@ -3946,6 +4014,7 @@
       // that, if a condition is false, we can immediately find out
       // which one.
       guarantee(_cm->out_of_regions(), "only way to reach here");
+      guarantee(_aborted_region.is_empty(), "only way to reach here");
       guarantee(_cm->region_stack_empty(), "only way to reach here");
       guarantee(_cm->mark_stack_empty(), "only way to reach here");
       guarantee(_task_queue->size() == 0, "only way to reach here");
@@ -4045,7 +4114,8 @@
     _nextMarkBitMap(NULL), _hash_seed(17),
     _task_queue(task_queue),
     _task_queues(task_queues),
-    _oop_closure(NULL) {
+    _oop_closure(NULL),
+    _aborted_region(MemRegion()) {
   guarantee(task_queue != NULL, "invariant");
   guarantee(task_queues != NULL, "invariant");
 
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Sep 08 16:10:51 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Sep 28 09:51:37 2010 -0700
@@ -250,21 +250,23 @@
 
   // This is lock-free; assumes that it will only be called in parallel
   // with other "push" operations (no pops).
-  void push(MemRegion mr);
-
-#if 0
-  // This is currently not used. See the comment in the .cpp file.
+  void push_lock_free(MemRegion mr);
 
   // Lock-free; assumes that it will only be called in parallel
   // with other "pop" operations (no pushes).
-  MemRegion pop();
-#endif // 0
+  MemRegion pop_lock_free();
+
+#if 0
+  // The routines that manipulate the region stack with a lock are
+  // not currently used. They should be retained, however, as a
+  // diagnostic aid.
 
   // These two are the implementations that use a lock. They can be
   // called concurrently with each other but they should not be called
   // concurrently with the lock-free versions (push() / pop()).
   void push_with_lock(MemRegion mr);
   MemRegion pop_with_lock();
+#endif
 
   bool isEmpty()    { return _index == 0; }
   bool isFull()     { return _index == _capacity; }
@@ -398,6 +400,7 @@
   volatile bool           _concurrent;
   // set at the end of a Full GC so that marking aborts
   volatile bool           _has_aborted;
+
   // used when remark aborts due to an overflow to indicate that
   // another concurrent marking phase should start
   volatile bool           _restart_for_overflow;
@@ -548,23 +551,30 @@
   bool mark_stack_overflow()            { return _markStack.overflow(); }
   bool mark_stack_empty()               { return _markStack.isEmpty(); }
 
-  // Manipulation of the region stack
-  bool region_stack_push(MemRegion mr) {
+  // (Lock-free) Manipulation of the region stack
+  bool region_stack_push_lock_free(MemRegion mr) {
     // Currently we only call the lock-free version during evacuation
     // pauses.
     assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 
-    _regionStack.push(mr);
+    _regionStack.push_lock_free(mr);
     if (_regionStack.overflow()) {
       set_has_overflown();
       return false;
     }
     return true;
   }
+
+  // Lock-free version of region-stack pop. Should only be
+  // called in tandem with other lock-free pops.
+  MemRegion region_stack_pop_lock_free() {
+    return _regionStack.pop_lock_free();
+  }
+
 #if 0
-  // Currently this is not used. See the comment in the .cpp file.
-  MemRegion region_stack_pop() { return _regionStack.pop(); }
-#endif // 0
+  // The routines that manipulate the region stack with a lock are
+  // not currently used. They should be retained, however, as a
+  // diagnostic aid.
 
   bool region_stack_push_with_lock(MemRegion mr) {
     // Currently we only call the lock-based version during either
@@ -579,6 +589,7 @@
     }
     return true;
   }
+
   MemRegion region_stack_pop_with_lock() {
     // Currently we only call the lock-based version during either
     // concurrent marking or remark.
@@ -587,11 +598,21 @@
 
     return _regionStack.pop_with_lock();
   }
+#endif
 
   int region_stack_size()               { return _regionStack.size(); }
   bool region_stack_overflow()          { return _regionStack.overflow(); }
   bool region_stack_empty()             { return _regionStack.isEmpty(); }
 
+  // Iterate over any regions that were aborted while draining the
+  // region stack (any such regions are saved in the corresponding
+  // CMTask) and invalidate (i.e. assign to the empty MemRegion())
+  // any regions that point into the collection set.
+  bool invalidate_aborted_regions_in_cset();
+
+  // Returns true if there are any aborted memory regions.
+  bool has_aborted_regions();
+
   bool concurrent_marking_in_progress() {
     return _concurrent_marking_in_progress;
   }
@@ -856,6 +877,15 @@
   // stack.
   HeapWord*                   _region_finger;
 
+  // If we abort while scanning a region we record the remaining
+  // unscanned portion and check this field when marking restarts.
+  // This avoids having to push on the region stack while other
+  // marking threads may still be popping regions.
+  // If we were to push the unscanned portion directly to the
+  // region stack then we would need to using locking versions
+  // of the push and pop operations.
+  MemRegion                   _aborted_region;
+
   // the number of words this task has scanned
   size_t                      _words_scanned;
   // When _words_scanned reaches this limit, the regular clock is
@@ -1012,6 +1042,15 @@
   void clear_has_aborted()      { _has_aborted = false; }
   bool claimed() { return _claimed; }
 
+  // Support routines for the partially scanned region that may be
+  // recorded as a result of aborting while draining the CMRegionStack
+  MemRegion aborted_region()    { return _aborted_region; }
+  void set_aborted_region(MemRegion mr)
+                                { _aborted_region = mr; }
+
+  // Clears any recorded partially scanned region
+  void clear_aborted_region()   { set_aborted_region(MemRegion()); }
+
   void set_oop_closure(OopClosure* oop_closure) {
     _oop_closure = oop_closure;
   }