Merge
authorbrutisso
Thu, 07 Feb 2013 22:04:17 +0100
changeset 15606 8cd2bedf376f
parent 15481 63b99e56827f (current diff)
parent 15604 0209977cd9c4 (diff)
child 15607 b7186055280e
Merge
hotspot/src/share/vm/runtime/thread.cpp
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/ObjectHeap.java	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/ObjectHeap.java	Thu Feb 07 22:04:17 2013 +0100
@@ -467,7 +467,7 @@
               liveRegions.add(tlab.start());
               liveRegions.add(tlab.start());
               liveRegions.add(tlab.top());
-              liveRegions.add(tlab.end());
+              liveRegions.add(tlab.hardEnd());
             }
           }
         }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java	Thu Feb 07 22:04:17 2013 +0100
@@ -27,6 +27,7 @@
 import java.io.*;
 import java.util.*;
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.types.*;
 
 /** <P> ThreadLocalAllocBuffer: a descriptor for thread-local storage
@@ -62,9 +63,22 @@
     super(addr);
   }
 
-  public Address start()                        { return startField.getValue(addr); }
-  public Address end()                          { return   endField.getValue(addr); }
-  public Address top()                          { return   topField.getValue(addr); }
+  public Address start()    { return startField.getValue(addr); }
+  public Address end()      { return   endField.getValue(addr); }
+  public Address top()      { return   topField.getValue(addr); }
+  public Address hardEnd()  { return end().addOffsetTo(alignmentReserve()); }
+
+  private long alignmentReserve() {
+    return Oop.alignObjectSize(endReserve());
+  }
+
+  private long endReserve() {
+    long minFillerArraySize = Array.baseOffsetInBytes(BasicType.T_INT);
+    long reserveForAllocationPrefetch = VM.getVM().getReserveForAllocationPrefetch();
+    long heapWordSize = VM.getVM().getHeapWordSize();
+
+    return Math.max(minFillerArraySize, reserveForAllocationPrefetch * heapWordSize);
+  }
 
   /** Support for iteration over heap -- not sure how this will
       interact with GC in reflective system, but necessary for the
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu Feb 07 22:04:17 2013 +0100
@@ -114,6 +114,7 @@
   private int          invalidOSREntryBCI;
   private ReversePtrs  revPtrs;
   private VMRegImpl    vmregImpl;
+  private int          reserveForAllocationPrefetch;
 
   // System.getProperties from debuggee VM
   private Properties   sysProps;
@@ -293,6 +294,10 @@
        vmRelease = CStringUtilities.getString(releaseAddr);
        Address vmInternalInfoAddr = vmVersion.getAddressField("_s_internal_vm_info_string").getValue();
        vmInternalInfo = CStringUtilities.getString(vmInternalInfoAddr);
+
+       CIntegerType intType = (CIntegerType) db.lookupType("int");
+       CIntegerField reserveForAllocationPrefetchField = vmVersion.getCIntegerField("_reserve_for_allocation_prefetch");
+       reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType);
     } catch (Exception exp) {
        throw new RuntimeException("can't determine target's VM version : " + exp.getMessage());
     }
@@ -778,6 +783,10 @@
     return vmInternalInfo;
   }
 
+  public int getReserveForAllocationPrefetch() {
+    return reserveForAllocationPrefetch;
+  }
+
   public boolean isSharingEnabled() {
     if (sharingEnabled == null) {
       Flag flag = getCommandLineFlag("UseSharedSpaces");
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Feb 07 22:04:17 2013 +0100
@@ -571,19 +571,14 @@
     _sleep_factor             =     0.0;
     _marking_task_overhead    =     1.0;
   } else {
-    if (ConcGCThreads > 0) {
-      // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
+    if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
+      // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
       // if both are set
-
-      _parallel_marking_threads = (uint) ConcGCThreads;
-      _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
     } else if (G1MarkingOverheadPercent > 0) {
-      // we will calculate the number of parallel marking threads
-      // based on a target overhead with respect to the soft real-time
-      // goal
-
+      // We will calculate the number of parallel marking threads based
+      // on a target overhead with respect to the soft real-time goal
       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
       double overall_cm_overhead =
         (double) MaxGCPauseMillis * marking_overhead /
@@ -596,17 +591,22 @@
       double sleep_factor =
                          (1.0 - marking_task_overhead) / marking_task_overhead;
 
-      _parallel_marking_threads = (uint) marking_thread_num;
-      _max_parallel_marking_threads = _parallel_marking_threads;
+      FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
       _sleep_factor             = sleep_factor;
       _marking_task_overhead    = marking_task_overhead;
     } else {
-      _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
-      _max_parallel_marking_threads = _parallel_marking_threads;
+      // Calculate the number of parallel marking threads by scaling
+      // the number of parallel GC threads.
+      uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
+      FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
     }
 
+    assert(ConcGCThreads > 0, "Should have been set");
+    _parallel_marking_threads = (uint) ConcGCThreads;
+    _max_parallel_marking_threads = _parallel_marking_threads;
+
     if (parallel_marking_threads() > 1) {
       _cleanup_task_overhead = 1.0;
     } else {
@@ -1190,7 +1190,7 @@
     uint active_workers = MAX2(1U, parallel_marking_threads());
 
     CMRootRegionScanTask task(this);
-    if (parallel_marking_threads() > 0) {
+    if (use_parallel_marking_threads()) {
       _parallel_workers->set_active_workers((int) active_workers);
       _parallel_workers->run_task(&task);
     } else {
@@ -1226,7 +1226,7 @@
   set_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
-  if (parallel_marking_threads() > 0) {
+  if (use_parallel_marking_threads()) {
     _parallel_workers->set_active_workers((int)active_workers);
     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
     // and the decisions on that MT processing is made elsewhere.
@@ -2167,7 +2167,8 @@
   assert(tmp_free_list.is_empty(), "post-condition");
 }
 
-// Support closures for reference procssing in G1
+// Supporting Object and Oop closures for reference discovery
+// and processing in during marking
 
 bool G1CMIsAliveClosure::do_object_b(oop obj) {
   HeapWord* addr = (HeapWord*)obj;
@@ -2175,73 +2176,26 @@
          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
 }
 
-class G1CMKeepAliveClosure: public ExtendedOopClosure {
-  G1CollectedHeap* _g1;
-  ConcurrentMark*  _cm;
- public:
-  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
-    _g1(g1), _cm(cm) {
-    assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
-  }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-
-  template <class T> void do_oop_work(T* p) {
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    HeapWord* addr = (HeapWord*)obj;
-
-    if (_cm->verbose_high()) {
-      gclog_or_tty->print_cr("\t[0] we're looking at location "
-                             "*"PTR_FORMAT" = "PTR_FORMAT,
-                             p, (void*) obj);
-    }
-
-    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
-      _cm->mark_and_count(obj);
-      _cm->mark_stack_push(obj);
-    }
-  }
-};
-
-class G1CMDrainMarkingStackClosure: public VoidClosure {
-  ConcurrentMark*               _cm;
-  CMMarkStack*                  _markStack;
-  G1CMKeepAliveClosure*         _oopClosure;
- public:
-  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
-                               G1CMKeepAliveClosure* oopClosure) :
-    _cm(cm),
-    _markStack(markStack),
-    _oopClosure(oopClosure) { }
-
-  void do_void() {
-    _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
-  }
-};
-
-// 'Keep Alive' closure used by parallel reference processing.
-// An instance of this closure is used in the parallel reference processing
-// code rather than an instance of G1CMKeepAliveClosure. We could have used
-// the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
-// placed on to discovered ref lists once so we can mark and push with no
-// need to check whether the object has already been marked. Using the
-// G1CMKeepAliveClosure would mean, however, having all the worker threads
-// operating on the global mark stack. This means that an individual
-// worker would be doing lock-free pushes while it processes its own
-// discovered ref list followed by drain call. If the discovered ref lists
-// are unbalanced then this could cause interference with the other
-// workers. Using a CMTask (and its embedded local data structures)
-// avoids that potential interference.
-class G1CMParKeepAliveAndDrainClosure: public OopClosure {
+// 'Keep Alive' oop closure used by both serial parallel reference processing.
+// Uses the CMTask associated with a worker thread (for serial reference
+// processing the CMTask for worker 0 is used) to preserve (mark) and
+// trace referent objects.
+//
+// Using the CMTask and embedded local queues avoids having the worker
+// threads operating on the global mark stack. This reduces the risk
+// of overflowing the stack - which we would rather avoid at this late
+// state. Also using the tasks' local queues removes the potential
+// of the workers interfering with each other that could occur if
+// operating on the global stack.
+
+class G1CMKeepAliveAndDrainClosure: public OopClosure {
   ConcurrentMark*  _cm;
   CMTask*          _task;
   int              _ref_counter_limit;
   int              _ref_counter;
  public:
-  G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
-    _cm(cm), _task(task),
-    _ref_counter_limit(G1RefProcDrainInterval) {
+  G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
+    _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval) {
     assert(_ref_counter_limit > 0, "sanity");
     _ref_counter = _ref_counter_limit;
   }
@@ -2262,18 +2216,22 @@
       _ref_counter--;
 
       if (_ref_counter == 0) {
-        // We have dealt with _ref_counter_limit references, pushing them and objects
-        // reachable from them on to the local stack (and possibly the global stack).
-        // Call do_marking_step() to process these entries. We call the routine in a
-        // loop, which we'll exit if there's nothing more to do (i.e. we're done
-        // with the entries that we've pushed as a result of the deal_with_reference
-        // calls above) or we overflow.
-        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
-        // while there may still be some work to do. (See the comment at the
-        // beginning of CMTask::do_marking_step() for those conditions - one of which
-        // is reaching the specified time target.) It is only when
-        // CMTask::do_marking_step() returns without setting the has_aborted() flag
-        // that the marking has completed.
+        // We have dealt with _ref_counter_limit references, pushing them
+        // and objects reachable from them on to the local stack (and
+        // possibly the global stack). Call CMTask::do_marking_step() to
+        // process these entries.
+        //
+        // We call CMTask::do_marking_step() in a loop, which we'll exit if
+        // there's nothing more to do (i.e. we're done with the entries that
+        // were pushed as a result of the CMTask::deal_with_reference() calls
+        // above) or we overflow.
+        //
+        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
+        // flag while there may still be some work to do. (See the comment at
+        // the beginning of CMTask::do_marking_step() for those conditions -
+        // one of which is reaching the specified time target.) It is only
+        // when CMTask::do_marking_step() returns without setting the
+        // has_aborted() flag that the marking step has completed.
         do {
           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
           _task->do_marking_step(mark_step_duration_ms,
@@ -2290,36 +2248,59 @@
   }
 };
 
-class G1CMParDrainMarkingStackClosure: public VoidClosure {
+// 'Drain' oop closure used by both serial and parallel reference processing.
+// Uses the CMTask associated with a given worker thread (for serial
+// reference processing the CMtask for worker 0 is used). Calls the
+// do_marking_step routine, with an unbelievably large timeout value,
+// to drain the marking data structures of the remaining entries
+// added by the 'keep alive' oop closure above.
+
+class G1CMDrainMarkingStackClosure: public VoidClosure {
   ConcurrentMark* _cm;
-  CMTask* _task;
+  CMTask*         _task;
+  bool            _do_stealing;
+  bool            _do_termination;
  public:
-  G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
-    _cm(cm), _task(task) { }
+  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_par) :
+    _cm(cm), _task(task) {
+    assert(is_par || _task->worker_id() == 0,
+           "Only task for worker 0 should be used if ref processing is single threaded");
+    // We only allow stealing and only enter the termination protocol
+    // in CMTask::do_marking_step() if this closure is being instantiated
+    // for parallel reference processing.
+    _do_stealing = _do_termination = is_par;
+  }
 
   void do_void() {
     do {
       if (_cm->verbose_high()) {
-        gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
-                               _task->worker_id());
+        gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - "
+                               "stealing: %s, termination: %s",
+                               _task->worker_id(),
+                               BOOL_TO_STR(_do_stealing),
+                               BOOL_TO_STR(_do_termination));
       }
 
-      // We call CMTask::do_marking_step() to completely drain the local and
-      // global marking stacks. The routine is called in a loop, which we'll
-      // exit if there's nothing more to do (i.e. we'completely drained the
-      // entries that were pushed as a result of applying the
-      // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
-      // lists above) or we overflow the global marking stack.
-      // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
-      // while there may still be some work to do. (See the comment at the
-      // beginning of CMTask::do_marking_step() for those conditions - one of which
-      // is reaching the specified time target.) It is only when
-      // CMTask::do_marking_step() returns without setting the has_aborted() flag
-      // that the marking has completed.
+      // We call CMTask::do_marking_step() to completely drain the local
+      // and global marking stacks of entries pushed by the 'keep alive'
+      // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
+      //
+      // CMTask::do_marking_step() is called in a loop, which we'll exit
+      // if there's nothing more to do (i.e. we'completely drained the
+      // entries that were pushed as a a result of applying the 'keep alive'
+      // closure to the entries on the discovered ref lists) or we overflow
+      // the global marking stack.
+      //
+      // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
+      // flag while there may still be some work to do. (See the comment at
+      // the beginning of CMTask::do_marking_step() for those conditions -
+      // one of which is reaching the specified time target.) It is only
+      // when CMTask::do_marking_step() returns without setting the
+      // has_aborted() flag that the marking step has completed.
 
       _task->do_marking_step(1000000000.0 /* something very large */,
-                             true /* do_stealing    */,
-                             true /* do_termination */);
+                             _do_stealing,
+                             _do_termination);
     } while (_task->has_aborted() && !_cm->has_overflown());
   }
 };
@@ -2352,19 +2333,23 @@
   ProcessTask&     _proc_task;
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
+  bool             _processing_is_mt;
 
 public:
   G1CMRefProcTaskProxy(ProcessTask& proc_task,
                      G1CollectedHeap* g1h,
                      ConcurrentMark* cm) :
     AbstractGangTask("Process reference objects in parallel"),
-    _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
+    _proc_task(proc_task), _g1h(g1h), _cm(cm) {
+      ReferenceProcessor* rp = _g1h->ref_processor_cm();
+      _processing_is_mt = rp->processing_is_mt();
+    }
 
   virtual void work(uint worker_id) {
     CMTask* marking_task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
-    G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
-    G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
+    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
+    G1CMDrainMarkingStackClosure g1_par_drain(_cm, marking_task, _processing_is_mt);
 
     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
   }
@@ -2372,6 +2357,7 @@
 
 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
   assert(_workers != NULL, "Need parallel worker threads.");
+  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
 
   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
 
@@ -2399,6 +2385,7 @@
 
 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
   assert(_workers != NULL, "Need parallel worker threads.");
+  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
 
   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
 
@@ -2429,59 +2416,58 @@
     // See the comment in G1CollectedHeap::ref_processing_init()
     // about how reference processing currently works in G1.
 
-    // Process weak references.
+    // Set the soft reference policy
     rp->setup_policy(clear_all_soft_refs);
     assert(_markStack.isEmpty(), "mark stack should be empty");
 
-    G1CMKeepAliveClosure g1_keep_alive(g1h, this);
-    G1CMDrainMarkingStackClosure
-      g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
-
-    // We use the work gang from the G1CollectedHeap and we utilize all
-    // the worker threads.
-    uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
+    // Non-MT instances 'Keep Alive' and 'Complete GC' oop closures.
+    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0));
+    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), false);
+
+    // We need at least one active thread. If reference processing is
+    // not multi-threaded we use the current (ConcurrentMarkThread) thread,
+    // otherwise we use the work gang from the G1CollectedHeap and we
+    // utilize all the worker threads we can.
+    uint active_workers = (rp->processing_is_mt() && g1h->workers() != NULL
+                                ? g1h->workers()->active_workers()
+                                : 1U);
+
     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
 
     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                               g1h->workers(), active_workers);
 
-    if (rp->processing_is_mt()) {
-      // Set the degree of MT here.  If the discovery is done MT, there
-      // may have been a different number of threads doing the discovery
-      // and a different number of discovered lists may have Ref objects.
-      // That is OK as long as the Reference lists are balanced (see
-      // balance_all_queues() and balance_queues()).
-      rp->set_active_mt_degree(active_workers);
-
-      rp->process_discovered_references(&g1_is_alive,
+    AbstractRefProcTaskExecutor* executor = (rp->processing_is_mt()
+                                                ? &par_task_executor
+                                                : NULL);
+
+    // Set the degree of MT processing here.  If the discovery was done MT,
+    // the number of threads involved during discovery could differ from
+    // the number of active workers.  This is OK as long as the discovered
+    // Reference lists are balanced (see balance_all_queues() and balance_queues()).
+    rp->set_active_mt_degree(active_workers);
+
+    // Process the weak references.
+    rp->process_discovered_references(&g1_is_alive,
                                       &g1_keep_alive,
                                       &g1_drain_mark_stack,
-                                      &par_task_executor);
-
-      // The work routines of the parallel keep_alive and drain_marking_stack
-      // will set the has_overflown flag if we overflow the global marking
-      // stack.
-    } else {
-      rp->process_discovered_references(&g1_is_alive,
-                                        &g1_keep_alive,
-                                        &g1_drain_mark_stack,
-                                        NULL);
-    }
+                                      executor);
+
+    // The do_oop work routines of the keep_alive and drain_marking_stack
+    // oop closures will set the has_overflown flag if we overflow the
+    // global marking stack.
 
     assert(_markStack.overflow() || _markStack.isEmpty(),
             "mark stack should be empty (unless it overflowed)");
     if (_markStack.overflow()) {
-      // Should have been done already when we tried to push an
+      // This should have been done already when we tried to push an
       // entry on to the global mark stack. But let's do it again.
       set_has_overflown();
     }
 
-    if (rp->processing_is_mt()) {
-      assert(rp->num_q() == active_workers, "why not");
-      rp->enqueue_discovered_references(&par_task_executor);
-    } else {
-      rp->enqueue_discovered_references();
-    }
+    assert(rp->num_q() == active_workers, "why not");
+
+    rp->enqueue_discovered_references(executor);
 
     rp->verify_no_references_recorded();
     assert(!rp->discovery_enabled(), "Post condition");
@@ -3242,7 +3228,9 @@
 }
 
 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
-  _parallel_workers->print_worker_threads_on(st);
+  if (use_parallel_marking_threads()) {
+    _parallel_workers->print_worker_threads_on(st);
+  }
 }
 
 // We take a break if someone is trying to stop the world.
@@ -4078,11 +4066,23 @@
                                _worker_id, _finger, _region_limit, _curr_region);
       }
 
-      // Let's iterate over the bitmap of the part of the
-      // region that is left.
-      if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
-        // We successfully completed iterating over the region. Now,
-        // let's give up the region.
+      HeapRegion* hr = _g1h->heap_region_containing(mr.start());
+      assert(!hr->isHumongous() || mr.start() == hr->bottom(),
+             "the start of HeapRegion and MemRegion should be consistent for humongous regions");
+
+      // The special case of the bitmap of a humongous region with its first
+      // bit NOT marked should be avoided from (wasteful) iterating.
+      // Note that the alternative case of the bitmap of a humongous region
+      // with its first bit marked is handled properly in the iterate() routine.
+      // Then, let's iterate over the bitmap of the part of the region that is
+      // left.
+      // If the iteration is successful, give up the region.
+      // Also note that the case of the bitmap of a humongous region with its
+      // first bit NOT marked is considered "successful", leveraging the fact
+      // that the entire bitmap consists of all 0's in such case.
+      if (mr.is_empty() ||
+          (hr != NULL && hr->isHumongous() && !_nextMarkBitMap->isMarked(mr.start())) ||
+          _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
         giveup_current_region();
         regular_clock_call();
       } else {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Feb 07 22:04:17 2013 +0100
@@ -371,8 +371,8 @@
   friend class CalcLiveObjectsClosure;
   friend class G1CMRefProcTaskProxy;
   friend class G1CMRefProcTaskExecutor;
-  friend class G1CMParKeepAliveAndDrainClosure;
-  friend class G1CMParDrainMarkingStackClosure;
+  friend class G1CMKeepAliveAndDrainClosure;
+  friend class G1CMDrainMarkingStackClosure;
 
 protected:
   ConcurrentMarkThread* _cmThread;   // the thread doing the work
@@ -499,17 +499,26 @@
   }
 
   // accessor methods
-  uint parallel_marking_threads() { return _parallel_marking_threads; }
-  uint max_parallel_marking_threads() { return _max_parallel_marking_threads;}
-  double sleep_factor()             { return _sleep_factor; }
-  double marking_task_overhead()    { return _marking_task_overhead;}
-  double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
-  double cleanup_task_overhead()    { return _cleanup_task_overhead;}
+  uint parallel_marking_threads() const     { return _parallel_marking_threads; }
+  uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;}
+  double sleep_factor()                     { return _sleep_factor; }
+  double marking_task_overhead()            { return _marking_task_overhead;}
+  double cleanup_sleep_factor()             { return _cleanup_sleep_factor; }
+  double cleanup_task_overhead()            { return _cleanup_task_overhead;}
 
-  HeapWord*               finger()        { return _finger;   }
-  bool                    concurrent()    { return _concurrent; }
-  uint                    active_tasks()  { return _active_tasks; }
-  ParallelTaskTerminator* terminator()    { return &_terminator; }
+  bool use_parallel_marking_threads() const {
+    assert(parallel_marking_threads() <=
+           max_parallel_marking_threads(), "sanity");
+    assert((_parallel_workers == NULL && parallel_marking_threads() == 0) ||
+           parallel_marking_threads() > 0,
+           "parallel workers not set up correctly");
+    return _parallel_workers != NULL;
+  }
+
+  HeapWord*               finger()          { return _finger;   }
+  bool                    concurrent()      { return _concurrent; }
+  uint                    active_tasks()    { return _active_tasks; }
+  ParallelTaskTerminator* terminator()      { return &_terminator; }
 
   // It claims the next available region to be scanned by a marking
   // task/thread. It might return NULL if the next region is empty or
--- a/hotspot/src/share/vm/memory/collectorPolicy.cpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp	Thu Feb 07 22:04:17 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -235,6 +235,18 @@
   if (NewSize + OldSize > MaxHeapSize) {
     MaxHeapSize = NewSize + OldSize;
   }
+
+  if (FLAG_IS_CMDLINE(OldSize) && FLAG_IS_DEFAULT(NewSize)) {
+    // NewRatio will be used later to set the young generation size so we use
+    // it to calculate how big the heap should be based on the requested OldSize
+    // and NewRatio.
+    assert(NewRatio > 0, "NewRatio should have been set up earlier");
+    size_t calculated_heapsize = (OldSize / NewRatio) * (NewRatio + 1);
+
+    calculated_heapsize = align_size_up(calculated_heapsize, max_alignment());
+    MaxHeapSize = calculated_heapsize;
+    InitialHeapSize = calculated_heapsize;
+  }
   MaxHeapSize = align_size_up(MaxHeapSize, max_alignment());
 
   always_do_update_barrier = UseConcMarkSweepGC;
@@ -384,14 +396,15 @@
 // keeping it simple also seems a worthwhile goal.
 bool TwoGenerationCollectorPolicy::adjust_gen0_sizes(size_t* gen0_size_ptr,
                                                      size_t* gen1_size_ptr,
-                                                     size_t heap_size,
-                                                     size_t min_gen0_size) {
+                                                     const size_t heap_size,
+                                                     const size_t min_gen1_size) {
   bool result = false;
+
   if ((*gen1_size_ptr + *gen0_size_ptr) > heap_size) {
-    if (((*gen0_size_ptr + OldSize) > heap_size) &&
-       (heap_size - min_gen0_size) >= min_alignment()) {
-      // Adjust gen0 down to accomodate OldSize
-      *gen0_size_ptr = heap_size - min_gen0_size;
+    if ((heap_size < (*gen0_size_ptr + min_gen1_size)) &&
+        (heap_size >= min_gen1_size + min_alignment())) {
+      // Adjust gen0 down to accommodate min_gen1_size
+      *gen0_size_ptr = heap_size - min_gen1_size;
       *gen0_size_ptr =
         MAX2((uintx)align_size_down(*gen0_size_ptr, min_alignment()),
              min_alignment());
--- a/hotspot/src/share/vm/memory/collectorPolicy.hpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/memory/collectorPolicy.hpp	Thu Feb 07 22:04:17 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -321,7 +321,7 @@
 
   // Returns true is gen0 sizes were adjusted
   bool adjust_gen0_sizes(size_t* gen0_size_ptr, size_t* gen1_size_ptr,
-                               size_t heap_size, size_t min_gen1_size);
+                         const size_t heap_size, const size_t min_gen1_size);
 };
 
 class MarkSweepPolicy : public TwoGenerationCollectorPolicy {
--- a/hotspot/src/share/vm/memory/metaspace.cpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/memory/metaspace.cpp	Thu Feb 07 22:04:17 2013 +0100
@@ -1737,10 +1737,10 @@
     *class_chunk_word_size = ClassSmallChunk;
     break;
   }
-  assert(chunk_word_size != 0 && class_chunk_word_size != 0,
+  assert(*chunk_word_size != 0 && *class_chunk_word_size != 0,
     err_msg("Initial chunks sizes bad: data  " SIZE_FORMAT
             " class " SIZE_FORMAT,
-            chunk_word_size, class_chunk_word_size));
+            *chunk_word_size, *class_chunk_word_size));
 }
 
 size_t SpaceManager::sum_free_in_chunks_in_use() const {
@@ -2040,7 +2040,7 @@
            align_size_up(humongous_chunks->word_size(),
                              HumongousChunkGranularity),
            err_msg("Humongous chunk size is wrong: word size " SIZE_FORMAT
-                   " granularity " SIZE_FORMAT,
+                   " granularity %d",
                    humongous_chunks->word_size(), HumongousChunkGranularity));
     Metachunk* next_humongous_chunks = humongous_chunks->next();
     chunk_manager->humongous_dictionary()->return_chunk(humongous_chunks);
@@ -2264,7 +2264,8 @@
   }
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   assert(allocation_total() == sum_used_in_chunks_in_use(),
-    err_msg("allocation total is not consistent %d vs %d",
+    err_msg("allocation total is not consistent " SIZE_FORMAT
+            " vs " SIZE_FORMAT,
             allocation_total(), sum_used_in_chunks_in_use()));
 }
 
@@ -2578,7 +2579,8 @@
 // argument passed in is at the top of the compressed space
 void Metaspace::initialize_class_space(ReservedSpace rs) {
   // The reserved space size may be bigger because of alignment, esp with UseLargePages
-  assert(rs.size() >= ClassMetaspaceSize, err_msg("%d != %d", rs.size(), ClassMetaspaceSize));
+  assert(rs.size() >= ClassMetaspaceSize,
+         err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize));
   _class_space_list = new VirtualSpaceList(rs);
 }
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Feb 07 22:04:17 2013 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1816,7 +1816,7 @@
                                                                             \
   product(uintx, InitiatingHeapOccupancyPercent, 45,                        \
           "Percentage of the (entire) heap occupancy to start a "           \
-          "concurrent GC cycle. It us used by GCs that trigger a "          \
+          "concurrent GC cycle. It is used by GCs that trigger a "          \
           "concurrent GC cycle based on the occupancy of the entire heap, " \
           "not just one of the generations (e.g., G1). A value of 0 "       \
           "denotes 'do constant GC cycles'.")                               \
--- a/hotspot/src/share/vm/runtime/thread.cpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Thu Feb 07 22:04:17 2013 +0100
@@ -1500,7 +1500,7 @@
   } else {
     _jni_attach_state = _not_attaching_via_jni;
   }
-  assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
+  assert(deferred_card_mark().is_empty(), "Default MemRegion ctor");
   _safepoint_visible = false;
 }
 
@@ -1896,9 +1896,16 @@
     JvmtiExport::cleanup_thread(this);
   }
 
+  // We must flush any deferred card marks before removing a thread from
+  // the list of active threads.
+  Universe::heap()->flush_deferred_store_barrier(this);
+  assert(deferred_card_mark().is_empty(), "Should have been flushed");
+
 #ifndef SERIALGC
-  // We must flush G1-related buffers before removing a thread from
-  // the list of active threads.
+  // We must flush the G1-related buffers before removing a thread
+  // from the list of active threads. We must do this after any deferred
+  // card marks have been flushed (above) so that any entries that are
+  // added to the thread's dirty card queue as a result are not lost.
   if (UseG1GC) {
     flush_barrier_queues();
   }
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Thu Feb 07 12:23:51 2013 -0800
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Thu Feb 07 22:04:17 2013 +0100
@@ -1161,6 +1161,7 @@
   static_field(Abstract_VM_Version,            _vm_major_version,                             int)                                   \
   static_field(Abstract_VM_Version,            _vm_minor_version,                             int)                                   \
   static_field(Abstract_VM_Version,            _vm_build_number,                              int)                                   \
+  static_field(Abstract_VM_Version,            _reserve_for_allocation_prefetch,              int)                                   \
                                                                                                                                      \
   static_field(JDK_Version,                    _current,                                      JDK_Version)                           \
   nonstatic_field(JDK_Version,                 _partially_initialized,                        bool)                                  \