8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
authorjohnc
Mon, 01 Oct 2012 09:28:13 -0700
changeset 14740 1431ef92fed1
parent 14635 6a3c85b8111c
child 14741 6ec8cc27e9c9
8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory. Reviewed-by: jmasa, brutisso
hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/runtime/arguments.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Nov 29 10:09:04 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Oct 01 09:28:13 2012 -0700
@@ -46,27 +46,11 @@
 
 // Concurrent marking bit map wrapper
 
-CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
-  _bm((uintptr_t*)NULL,0),
+CMBitMapRO::CMBitMapRO(int shifter) :
+  _bm(),
   _shifter(shifter) {
-  _bmStartWord = (HeapWord*)(rs.base());
-  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
-  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
-                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
-
-  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
-
-  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
-  // For now we'll just commit all of the bit map up fromt.
-  // Later on we'll try to be more parsimonious with swap.
-  guarantee(_virtual_space.initialize(brs, brs.size()),
-            "couldn't reseve backing store for concurrent marking bit map");
-  assert(_virtual_space.committed_size() == brs.size(),
-         "didn't reserve backing store for all of concurrent marking bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
-  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
-         _bmWordSize, "inconsistency in bit map sizing");
-  _bm.set_size(_bmWordSize >> _shifter);
+  _bmStartWord = 0;
+  _bmWordSize = 0;
 }
 
 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
@@ -108,15 +92,40 @@
 }
 
 #ifndef PRODUCT
-bool CMBitMapRO::covers(ReservedSpace rs) const {
+bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
          "size inconsistency");
-  return _bmStartWord == (HeapWord*)(rs.base()) &&
-         _bmWordSize  == rs.size()>>LogHeapWordSize;
+  return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
+         _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 }
 #endif
 
+bool CMBitMap::allocate(ReservedSpace heap_rs) {
+  _bmStartWord = (HeapWord*)(heap_rs.base());
+  _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
+  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
+                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
+  if (!brs.is_reserved()) {
+    warning("ConcurrentMark marking bit map allocation failure");
+    return false;
+  }
+  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
+  // For now we'll just commit all of the bit map up front.
+  // Later on we'll try to be more parsimonious with swap.
+  if (!_virtual_space.initialize(brs, brs.size())) {
+    warning("ConcurrentMark marking bit map backing store failure");
+    return false;
+  }
+  assert(_virtual_space.committed_size() == brs.size(),
+         "didn't reserve backing store for all of concurrent marking bit map?");
+  _bm.set_map((uintptr_t*)_virtual_space.low());
+  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
+         _bmWordSize, "inconsistency in bit map sizing");
+  _bm.set_size(_bmWordSize >> _shifter);
+  return true;
+}
+
 void CMBitMap::clearAll() {
   _bm.clear();
   return;
@@ -163,20 +172,79 @@
 #endif
 {}
 
-void CMMarkStack::allocate(size_t size) {
-  _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
-  if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate CM region mark stack");
+bool CMMarkStack::allocate(size_t capacity) {
+  // allocate a stack of the requisite depth
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
+  if (!rs.is_reserved()) {
+    warning("ConcurrentMark MarkStack allocation failure");
+    return false;
   }
-  _index = 0;
-  _capacity = (jint) size;
+  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+  if (!_virtual_space.initialize(rs, rs.size())) {
+    warning("ConcurrentMark MarkStack backing store failure");
+    // Release the virtual memory reserved for the marking stack
+    rs.release();
+    return false;
+  }
+  assert(_virtual_space.committed_size() == rs.size(),
+         "Didn't reserve backing store for all of ConcurrentMark stack?");
+  _base = (oop*) _virtual_space.low();
+  setEmpty();
+  _capacity = (jint) capacity;
   _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
+  return true;
+}
+
+void CMMarkStack::expand() {
+  // Called, during remark, if we've overflown the marking stack during marking.
+  assert(isEmpty(), "stack should been emptied while handling overflow");
+  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
+  // Clear expansion flag
+  _should_expand = false;
+  if (_capacity == (jint) MarkStackSizeMax) {
+    if (PrintGCDetails && Verbose) {
+      gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
+    }
+    return;
+  }
+  // Double capacity if possible
+  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
+  // Do not give up existing stack until we have managed to
+  // get the double capacity that we desired.
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
+                                                           sizeof(oop)));
+  if (rs.is_reserved()) {
+    // Release the backing store associated with old stack
+    _virtual_space.release();
+    // Reinitialize virtual space for new stack
+    if (!_virtual_space.initialize(rs, rs.size())) {
+      fatal("Not enough swap for expanded marking stack capacity");
+    }
+    _base = (oop*)(_virtual_space.low());
+    _index = 0;
+    _capacity = new_capacity;
+  } else {
+    if (PrintGCDetails && Verbose) {
+      // Failed to double capacity, continue;
+      gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
+                          SIZE_FORMAT"K to " SIZE_FORMAT"K",
+                          _capacity / K, new_capacity / K);
+    }
+  }
+}
+
+void CMMarkStack::set_should_expand() {
+  // If we're resetting the marking state because of an
+  // marking stack overflow, record that we should, if
+  // possible, expand the stack.
+  _should_expand = _cm->has_overflown();
 }
 
 CMMarkStack::~CMMarkStack() {
   if (_base != NULL) {
-    FREE_C_HEAP_ARRAY(oop, _base, mtGC);
+    _base = NULL;
+    _virtual_space.release();
   }
 }
 
@@ -217,7 +285,7 @@
     jint res = Atomic::cmpxchg(next_index, &_index, index);
     if (res == index) {
       for (int i = 0; i < n; i++) {
-        int ind = index + i;
+        int  ind = index + i;
         assert(ind < _capacity, "By overflow test above.");
         _base[ind] = ptr_arr[i];
       }
@@ -228,7 +296,6 @@
   }
 }
 
-
 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   jint start = _index;
@@ -244,9 +311,9 @@
     assert(ind < _capacity, "By overflow test above.");
     _base[ind] = ptr_arr[i];
   }
+  NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 }
 
-
 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   jint index = _index;
@@ -255,7 +322,7 @@
     return false;
   } else {
     int k = MIN2(max, index);
-    jint new_ind = index - k;
+    jint  new_ind = index - k;
     for (int j = 0; j < k; j++) {
       ptr_arr[j] = _base[new_ind + j];
     }
@@ -404,9 +471,10 @@
   return MAX2((n_par_threads + 2) / 4, 1U);
 }
 
-ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
-  _markBitMap1(rs, MinObjAlignment - 1),
-  _markBitMap2(rs, MinObjAlignment - 1),
+ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
+  _g1h(g1h),
+  _markBitMap1(MinObjAlignment - 1),
+  _markBitMap2(MinObjAlignment - 1),
 
   _parallel_marking_threads(0),
   _max_parallel_marking_threads(0),
@@ -415,10 +483,10 @@
   _cleanup_sleep_factor(0.0),
   _cleanup_task_overhead(1.0),
   _cleanup_list("Cleanup List"),
-  _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
-  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
-           CardTableModRefBS::card_shift,
-           false /* in_resource_area*/),
+  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
+  _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
+            CardTableModRefBS::card_shift,
+            false /* in_resource_area*/),
 
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
@@ -449,7 +517,8 @@
   _parallel_workers(NULL),
 
   _count_card_bitmaps(NULL),
-  _count_marked_bytes(NULL) {
+  _count_marked_bytes(NULL),
+  _completed_initialization(false) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
     verbose_level = no_verbose;
@@ -464,61 +533,34 @@
                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   }
 
-  _markStack.allocate(MarkStackSize);
+  if (!_markBitMap1.allocate(heap_rs)) {
+    warning("Failed to allocate first CM bit map");
+    return;
+  }
+  if (!_markBitMap2.allocate(heap_rs)) {
+    warning("Failed to allocate second CM bit map");
+    return;
+  }
 
   // Create & start a ConcurrentMark thread.
   _cmThread = new ConcurrentMarkThread(this);
   assert(cmThread() != NULL, "CM Thread should have been created");
   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 
-  _g1h = G1CollectedHeap::heap();
   assert(CGC_lock != NULL, "Where's the CGC_lock?");
-  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
-  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
+  assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
   _root_regions.init(_g1h, this);
 
-  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
-  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
-
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
-
-  BitMap::idx_t card_bm_size = _card_bm.size();
-
-  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
-  _active_tasks = _max_worker_id;
-  for (uint i = 0; i < _max_worker_id; ++i) {
-    CMTaskQueue* task_queue = new CMTaskQueue();
-    task_queue->initialize();
-    _task_queues->register_queue(i, task_queue);
-
-    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
-
-    _tasks[i] = new CMTask(i, this,
-                           _count_marked_bytes[i],
-                           &_count_card_bitmaps[i],
-                           task_queue, _task_queues);
-
-    _accum_task_vtime[i] = 0.0;
-  }
-
-  // Calculate the card number for the bottom of the heap. Used
-  // in biasing indexes into the accounting card bitmaps.
-  _heap_bottom_card_num =
-    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
-                                CardTableModRefBS::card_shift);
-
-  // Clear all the liveness counting data
-  clear_all_count_data();
-
   if (ConcGCThreads > ParallelGCThreads) {
-    vm_exit_during_initialization("Can't have more ConcGCThreads "
-                                  "than ParallelGCThreads.");
+    warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
+            "than ParallelGCThreads (" UINT32_FORMAT ").",
+            ConcGCThreads, ParallelGCThreads);
+    return;
   }
   if (ParallelGCThreads == 0) {
     // if we are not running with any parallel GC threads we will not
@@ -590,9 +632,86 @@
     }
   }
 
+  if (FLAG_IS_DEFAULT(MarkStackSize)) {
+    uintx mark_stack_size =
+      MIN2(MarkStackSizeMax,
+          MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
+    // Verify that the calculated value for MarkStackSize is in range.
+    // It would be nice to use the private utility routine from Arguments.
+    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
+      warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
+              "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
+              mark_stack_size, 1, MarkStackSizeMax);
+      return;
+    }
+    FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
+  } else {
+    // Verify MarkStackSize is in range.
+    if (FLAG_IS_CMDLINE(MarkStackSize)) {
+      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
+                  "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
+                  MarkStackSize, 1, MarkStackSizeMax);
+          return;
+        }
+      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
+        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
+          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
+                  " or for MarkStackSizeMax (" UINTX_FORMAT ")",
+                  MarkStackSize, MarkStackSizeMax);
+          return;
+        }
+      }
+    }
+  }
+
+  if (!_markStack.allocate(MarkStackSize)) {
+    warning("Failed to allocate CM marking stack");
+    return;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
+
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_worker_id;
+
+  size_t max_regions = (size_t) _g1h->max_regions();
+  for (uint i = 0; i < _max_worker_id; ++i) {
+    CMTaskQueue* task_queue = new CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+
+    _tasks[i] = new CMTask(i, this,
+                           _count_marked_bytes[i],
+                           &_count_card_bitmaps[i],
+                           task_queue, _task_queues);
+
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
   // so that the call below can read a sensible value
-  _heap_start = (HeapWord*) rs.base();
+  _heap_start = (HeapWord*) heap_rs.base();
   set_non_marking_state();
+  _completed_initialization = true;
 }
 
 void ConcurrentMark::update_g1_committed(bool force) {
@@ -1165,6 +1284,11 @@
     assert(!restart_for_overflow(), "sanity");
   }
 
+  // Expand the marking stack, if we have to and if we can.
+  if (_markStack.should_expand()) {
+    _markStack.expand();
+  }
+
   // Reset the marking state if marking completed
   if (!restart_for_overflow()) {
     set_non_marking_state();
@@ -2785,7 +2909,7 @@
     // Verify entries on the task queues
     for (uint i = 0; i < _max_worker_id; i += 1) {
       cl.set_phase(VerifyNoCSetOopsQueues, i);
-      OopTaskQueue* queue = _task_queues->queue(i);
+      CMTaskQueue* queue = _task_queues->queue(i);
       queue->oops_do(&cl);
     }
   }
@@ -2840,8 +2964,8 @@
 #endif // PRODUCT
 
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
-  _markStack.setEmpty();
-  _markStack.clear_overflow();
+  _markStack.set_should_expand();
+  _markStack.setEmpty();        // Also clears the _markStack overflow flag
   if (clear_overflow) {
     clear_has_overflown();
   } else {
@@ -2850,7 +2974,7 @@
   _finger = _heap_start;
 
   for (uint i = 0; i < _max_worker_id; ++i) {
-    OopTaskQueue* queue = _task_queues->queue(i);
+    CMTaskQueue* queue = _task_queues->queue(i);
     queue->set_empty();
   }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Nov 29 10:09:04 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Oct 01 09:28:13 2012 -0700
@@ -63,7 +63,7 @@
 
  public:
   // constructor
-  CMBitMapRO(ReservedSpace rs, int shifter);
+  CMBitMapRO(int shifter);
 
   enum { do_yield = true };
 
@@ -117,8 +117,11 @@
 
  public:
   // constructor
-  CMBitMap(ReservedSpace rs, int shifter) :
-    CMBitMapRO(rs, shifter) {}
+  CMBitMap(int shifter) :
+    CMBitMapRO(shifter) {}
+
+  // Allocates the back store for the marking bitmap
+  bool allocate(ReservedSpace heap_rs);
 
   // write marks
   void mark(HeapWord* addr) {
@@ -155,17 +158,18 @@
   MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
 };
 
-// Represents a marking stack used by the CM collector.
-// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
+// Represents a marking stack used by ConcurrentMarking in the G1 collector.
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
+  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
   ConcurrentMark* _cm;
   oop*   _base;        // bottom of stack
-  jint   _index;       // one more than last occupied index
-  jint   _capacity;    // max #elements
-  jint   _saved_index; // value of _index saved at start of GC
-  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
+  jint _index;       // one more than last occupied index
+  jint _capacity;    // max #elements
+  jint _saved_index; // value of _index saved at start of GC
+  NOT_PRODUCT(jint _max_depth;)   // max depth plumbed during run
 
-  bool   _overflow;
+  bool  _overflow;
+  bool  _should_expand;
   DEBUG_ONLY(bool _drain_in_progress;)
   DEBUG_ONLY(bool _drain_in_progress_yields;)
 
@@ -173,7 +177,13 @@
   CMMarkStack(ConcurrentMark* cm);
   ~CMMarkStack();
 
-  void allocate(size_t size);
+#ifndef PRODUCT
+  jint max_depth() const {
+    return _max_depth;
+  }
+#endif
+
+  bool allocate(size_t capacity);
 
   oop pop() {
     if (!isEmpty()) {
@@ -231,11 +241,17 @@
 
   bool isEmpty()    { return _index == 0; }
   bool isFull()     { return _index == _capacity; }
-  int maxElems()    { return _capacity; }
+  int  maxElems()   { return _capacity; }
 
   bool overflow() { return _overflow; }
   void clear_overflow() { _overflow = false; }
 
+  bool should_expand() const { return _should_expand; }
+  void set_should_expand();
+
+  // Expand the stack, typically in response to an overflow condition
+  void expand();
+
   int  size() { return _index; }
 
   void setEmpty()   { _index = 0; clear_overflow(); }
@@ -344,6 +360,7 @@
 class ConcurrentMarkThread;
 
 class ConcurrentMark: public CHeapObj<mtGC> {
+  friend class CMMarkStack;
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -577,6 +594,9 @@
   // the card bitmaps.
   intptr_t _heap_bottom_card_num;
 
+  // Set to true when initialization is complete
+  bool _completed_initialization;
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
@@ -636,7 +656,7 @@
     return _task_queues->steal(worker_id, hash_seed, obj);
   }
 
-  ConcurrentMark(ReservedSpace rs, uint max_regions);
+  ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
   ~ConcurrentMark();
 
   ConcurrentMarkThread* cmThread() { return _cmThread; }
@@ -907,6 +927,11 @@
   // Should *not* be called from parallel code.
   inline bool mark_and_count(oop obj);
 
+  // Returns true if initialization was successfully completed.
+  bool completed_initialization() const {
+    return _completed_initialization;
+  }
+
 protected:
   // Clear all the per-task bitmaps and arrays used to store the
   // counting data.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Nov 29 10:09:04 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Oct 01 09:28:13 2012 -0700
@@ -2079,7 +2079,11 @@
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
-  _cm       = new ConcurrentMark(heap_rs, max_regions());
+  _cm = new ConcurrentMark(this, heap_rs);
+  if (_cm == NULL || !_cm->completed_initialization()) {
+    vm_shutdown_during_initialization("Could not create/initialize ConcurrentMark");
+    return JNI_ENOMEM;
+  }
   _cmThread = _cm->cmThread();
 
   // Initialize the from_card cache structure of HeapRegionRemSet.
@@ -2087,7 +2091,7 @@
 
   // Now expand into the initial heap size.
   if (!expand(init_byte_size)) {
-    vm_exit_during_initialization("Failed to allocate initial heap.");
+    vm_shutdown_during_initialization("Failed to allocate initial heap.");
     return JNI_ENOMEM;
   }
 
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Thu Nov 29 10:09:04 2012 -0800
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Mon Oct 01 09:28:13 2012 -0700
@@ -1499,13 +1499,12 @@
                      Abstract_VM_Version::parallel_worker_threads());
   }
 
-  if (FLAG_IS_DEFAULT(MarkStackSize)) {
-    FLAG_SET_DEFAULT(MarkStackSize, 128 * TASKQUEUE_SIZE);
-  }
-  if (PrintGCDetails && Verbose) {
-    tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
-      MarkStackSize / K, MarkStackSizeMax / K);
-    tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
+  // MarkStackSize will be set (if it hasn't been set by the user)
+  // when concurrent marking is initialized.
+  // Its value will be based upon the number of parallel marking threads.
+  // But we do set the maximum mark stack size here.
+  if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
+    FLAG_SET_DEFAULT(MarkStackSizeMax, 128 * TASKQUEUE_SIZE);
   }
 
   if (FLAG_IS_DEFAULT(GCTimeRatio) || GCTimeRatio == 0) {
@@ -1517,6 +1516,12 @@
     // is allocation). We might consider increase it further.
     FLAG_SET_DEFAULT(GCTimeRatio, 9);
   }
+
+  if (PrintGCDetails && Verbose) {
+    tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
+      MarkStackSize / K, MarkStackSizeMax / K);
+    tty->print_cr("ConcGCThreads: %u", ConcGCThreads);
+  }
 }
 
 void Arguments::set_heap_size() {
@@ -1980,6 +1985,9 @@
   status = status && verify_min_value(ClassMetaspaceSize, 1*M,
                                       "ClassMetaspaceSize");
 
+  status = status && verify_interval(MarkStackSizeMax,
+                                  1, (max_jint - 1), "MarkStackSizeMax");
+
 #ifdef SPARC
   if (UseConcMarkSweepGC || UseG1GC) {
     // Issue a stern warning if the user has explicitly set