Merge
authorfparain
Fri, 03 Feb 2012 14:04:59 -0500
changeset 11630 65cad763ad87
parent 11629 72a73185bdc7 (current diff)
parent 11623 895059543b5b (diff)
child 11639 ff8cfc20d5cb
Merge
hotspot/src/share/vm/runtime/globals.hpp
--- a/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 6561530ea757c3f3a6fb171c9cc7b3885cdeca85 jdk8-b20
 b3a426170188f52981cf4573a2f14d487fddab0d jdk8-b21
 e8f03541af27e38aafb619b96863e17f65ffe53b jdk8-b22
+498124337041ad53cbaa7eb110f3d7acd6d4eac4 jdk8-b23
--- a/.hgtags-top-repo	Wed Feb 01 15:01:08 2012 -0500
+++ b/.hgtags-top-repo	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 5a5eaf6374bcbe23530899579fed17a05b7705f3 jdk8-b20
 cc771d92284f71765eca14d6d08703c4af254c04 jdk8-b21
 7ad075c809952e355d25030605da6af30456ed74 jdk8-b22
+60d6f64a86b1e511169d264727f6d51415978df0 jdk8-b23
--- a/corba/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/corba/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 51d8b6cb18c0978ecfa4f33e1537d35ee01b69fa jdk8-b20
 f157fc2a71a38ce44007a6f18d5b011824dce705 jdk8-b21
 a11d0062c445d5f36651c78650ab88aa594bcbff jdk8-b22
+5218eb256658442b62b05295aafa5b5f35252972 jdk8-b23
--- a/hotspot/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -213,3 +213,5 @@
 513351373923f74a7c91755748b95c9771e59f96 hs23-b10
 24727fb37561779077fdfa5a33342246f20e5c0f jdk8-b22
 dcc292399a39113957eebbd3e487b7e05e2c79fc hs23-b11
+e850d8e7ea54b91c7aa656e297f0f9f38dd4c296 jdk8-b23
+9e177d44b10fe92ecffa965fef9c5ac5433c1b46 hs23-b12
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,8 +49,12 @@
     static private long g1CommittedFieldOffset;
     // size_t _summary_bytes_used;
     static private CIntegerField summaryBytesUsedField;
-    // G1MonitoringSupport* _g1mm
+    // G1MonitoringSupport* _g1mm;
     static private AddressField g1mmField;
+    // MasterOldRegionSet _old_set;
+    static private long oldSetFieldOffset;
+    // MasterHumongousRegionSet _humongous_set;
+    static private long humongousSetFieldOffset;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -67,12 +71,14 @@
         g1CommittedFieldOffset = type.getField("_g1_committed").getOffset();
         summaryBytesUsedField = type.getCIntegerField("_summary_bytes_used");
         g1mmField = type.getAddressField("_g1mm");
+        oldSetFieldOffset = type.getField("_old_set").getOffset();
+        humongousSetFieldOffset = type.getField("_humongous_set").getOffset();
     }
 
     public long capacity() {
         Address g1CommittedAddr = addr.addOffsetTo(g1CommittedFieldOffset);
-        MemRegion g1_committed = new MemRegion(g1CommittedAddr);
-        return g1_committed.byteSize();
+        MemRegion g1Committed = new MemRegion(g1CommittedAddr);
+        return g1Committed.byteSize();
     }
 
     public long used() {
@@ -94,6 +100,18 @@
         return (G1MonitoringSupport) VMObjectFactory.newObject(G1MonitoringSupport.class, g1mmAddr);
     }
 
+    public HeapRegionSetBase oldSet() {
+        Address oldSetAddr = addr.addOffsetTo(oldSetFieldOffset);
+        return (HeapRegionSetBase) VMObjectFactory.newObject(HeapRegionSetBase.class,
+                                                             oldSetAddr);
+    }
+
+    public HeapRegionSetBase humongousSet() {
+        Address humongousSetAddr = addr.addOffsetTo(humongousSetFieldOffset);
+        return (HeapRegionSetBase) VMObjectFactory.newObject(HeapRegionSetBase.class,
+                                                             humongousSetAddr);
+    }
+
     private Iterator<HeapRegion> heapRegionIterator() {
         return hrs().heapRegionIterator();
     }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1MonitoringSupport.java	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1MonitoringSupport.java	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,6 +77,10 @@
         return edenUsedField.getValue(addr);
     }
 
+    public long edenRegionNum() {
+        return edenUsed() / HeapRegion.grainBytes();
+    }
+
     public long survivorCommitted() {
         return survivorCommittedField.getValue(addr);
     }
@@ -85,6 +89,10 @@
         return survivorUsedField.getValue(addr);
     }
 
+    public long survivorRegionNum() {
+        return survivorUsed() / HeapRegion.grainBytes();
+    }
+
     public long oldCommitted() {
         return oldCommittedField.getValue(addr);
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Fri Feb 03 14:04:59 2012 -0500
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.gc_implementation.g1;
+
+import java.util.Iterator;
+import java.util.Observable;
+import java.util.Observer;
+
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObject;
+import sun.jvm.hotspot.runtime.VMObjectFactory;
+import sun.jvm.hotspot.types.AddressField;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+// Mirror class for HeapRegionSetBase. Represents a group of regions.
+
+public class HeapRegionSetBase extends VMObject {
+    // size_t _length;
+    static private CIntegerField lengthField;
+    // size_t _region_num;
+    static private CIntegerField regionNumField;
+    // size_t _total_used_bytes;
+    static private CIntegerField totalUsedBytesField;
+
+    static {
+        VM.registerVMInitializedObserver(new Observer() {
+                public void update(Observable o, Object data) {
+                    initialize(VM.getVM().getTypeDataBase());
+                }
+            });
+    }
+
+    static private synchronized void initialize(TypeDataBase db) {
+        Type type = db.lookupType("HeapRegionSetBase");
+
+        lengthField         = type.getCIntegerField("_length");
+        regionNumField      = type.getCIntegerField("_region_num");
+        totalUsedBytesField = type.getCIntegerField("_total_used_bytes");
+    }
+
+    public long length() {
+        return lengthField.getValue(addr);
+    }
+
+    public long regionNum() {
+        return regionNumField.getValue(addr);
+    }
+
+    public long totalUsedBytes() {
+        return totalUsedBytesField.getValue(addr);
+    }
+
+    public HeapRegionSetBase(Address addr) {
+        super(addr);
+    }
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,7 @@
       printValue("SurvivorRatio    = ", getFlagValue("SurvivorRatio", flagMap));
       printValMB("PermSize         = ", getFlagValue("PermSize", flagMap));
       printValMB("MaxPermSize      = ", getFlagValue("MaxPermSize", flagMap));
+      printValMB("G1HeapRegionSize = ", HeapRegion.grainBytes());
 
       System.out.println();
       System.out.println("Heap Usage:");
@@ -100,11 +101,20 @@
          } else if (sharedHeap instanceof G1CollectedHeap) {
              G1CollectedHeap g1h = (G1CollectedHeap) sharedHeap;
              G1MonitoringSupport g1mm = g1h.g1mm();
-             System.out.println("G1 Young Generation");
-             printG1Space("Eden Space:", g1mm.edenUsed(), g1mm.edenCommitted());
-             printG1Space("From Space:", g1mm.survivorUsed(), g1mm.survivorCommitted());
-             printG1Space("To Space:", 0, 0);
-             printG1Space("G1 Old Generation", g1mm.oldUsed(), g1mm.oldCommitted());
+             long edenRegionNum = g1mm.edenRegionNum();
+             long survivorRegionNum = g1mm.survivorRegionNum();
+             HeapRegionSetBase oldSet = g1h.oldSet();
+             HeapRegionSetBase humongousSet = g1h.humongousSet();
+             long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum();
+             printG1Space("G1 Heap:", g1h.n_regions(),
+                          g1h.used(), g1h.capacity());
+             System.out.println("G1 Young Generation:");
+             printG1Space("Eden Space:", edenRegionNum,
+                          g1mm.edenUsed(), g1mm.edenCommitted());
+             printG1Space("Survivor Space:", survivorRegionNum,
+                          g1mm.survivorUsed(), g1mm.survivorCommitted());
+             printG1Space("G1 Old Generation:", oldRegionNum,
+                          g1mm.oldUsed(), g1mm.oldCommitted());
          } else {
              throw new RuntimeException("unknown SharedHeap type : " + heap.getClass());
          }
@@ -216,9 +226,11 @@
       System.out.println(alignment +  (double)space.used() * 100.0 / space.capacity() + "% used");
    }
 
-   private void printG1Space(String spaceName, long used, long capacity) {
+   private void printG1Space(String spaceName, long regionNum,
+                             long used, long capacity) {
       long free = capacity - used;
       System.out.println(spaceName);
+      printValue("regions  = ", regionNum);
       printValMB("capacity = ", capacity);
       printValMB("used     = ", used);
       printValMB("free     = ", free);
--- a/hotspot/make/hotspot_version	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/make/hotspot_version	Fri Feb 03 14:04:59 2012 -0500
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=12
+HS_BUILD_NUMBER=13
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/hotspot/src/cpu/sparc/vm/frame_sparc.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/sparc/vm/frame_sparc.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -810,7 +810,7 @@
 }
 
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 #define DESCRIBE_FP_OFFSET(name) \
   values.describe(frame_no, fp() + frame::name##_offset, #name)
@@ -820,11 +820,19 @@
     values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
   }
 
-  if (is_interpreted_frame()) {
+  if (is_ricochet_frame()) {
+    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+  } else if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_padding);
     DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp);
+
+    // esp, according to Lesp (e.g. not depending on bci), if seems valid
+    intptr_t* esp = *interpreter_frame_esp_addr();
+    if ((esp >= sp()) && (esp < fp())) {
+      values.describe(-1, esp, "*Lesp");
+    }
   }
 
   if (!is_compiled_frame()) {
@@ -844,4 +852,3 @@
   // unused... but returns fp() to minimize changes introduced by 7087445
   return fp();
 }
-
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -177,7 +177,7 @@
   BLOCK_COMMENT("ricochet_blob.bounce");
 
   if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-  trace_method_handle(_masm, "ricochet_blob.bounce");
+  trace_method_handle(_masm, "return/ricochet_blob.bounce");
 
   __ JMP(L1_continuation, 0);
   __ delayed()->nop();
@@ -268,14 +268,16 @@
 }
 
 // Emit code to verify that FP is pointing at a valid ricochet frame.
-#ifdef ASSERT
+#ifndef PRODUCT
 enum {
   ARG_LIMIT = 255, SLOP = 45,
   // use this parameter for checking for garbage stack movements:
   UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
   // the slop defends against false alarms due to fencepost errors
 };
+#endif
 
+#ifdef ASSERT
 void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
   // The stack should look like this:
   //    ... keep1 | dest=42 | keep2 | magic | handler | magic | recursive args | [RF]
@@ -1001,31 +1003,142 @@
 }
 
 #ifndef PRODUCT
+void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+    RicochetFrame* rf = new RicochetFrame(*fr);
+
+    // ricochet slots (kept in registers for sparc)
+    values.describe(frame_no, rf->register_addr(I5_savedSP), err_msg("exact_sender_sp reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L5_conversion), err_msg("conversion reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L4_saved_args_base), err_msg("saved_args_base reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L3_saved_args_layout), err_msg("saved_args_layout reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L2_saved_target), err_msg("saved_target reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L1_continuation), err_msg("continuation reg for #%d", frame_no));
+
+    // relevant ricochet targets (in caller frame)
+    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+    values.describe(-1, (intptr_t *)(STACK_BIAS+(uintptr_t)rf->exact_sender_sp()),  err_msg("*exact_sender_sp+STACK_BIAS for #%d", frame_no));
+}
+#endif // ASSERT
+
+#ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oopDesc* mh,
-                              intptr_t* saved_sp) {
+                              intptr_t* saved_sp,
+                              intptr_t* args,
+                              intptr_t* tracing_fp) {
   bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have mh
-  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp);
-  if (has_mh)
+
+  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp, args);
+
+  if (Verbose) {
+    // dumping last frame with frame::describe
+
+    JavaThread* p = JavaThread::active();
+
+    ResourceMark rm;
+    PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+    FrameValues values;
+
+    // Note: We want to allow trace_method_handle from any call site.
+    // While trace_method_handle creates a frame, it may be entered
+    // without a valid return PC in O7 (e.g. not just after a call).
+    // Walking that frame could lead to failures due to that invalid PC.
+    // => carefully detect that frame when doing the stack walking
+
+    // walk up to the right frame using the "tracing_fp" argument
+    intptr_t* cur_sp = StubRoutines::Sparc::flush_callers_register_windows_func()();
+    frame cur_frame(cur_sp, frame::unpatchable, NULL);
+
+    while (cur_frame.fp() != (intptr_t *)(STACK_BIAS+(uintptr_t)tracing_fp)) {
+      cur_frame = os::get_sender_for_C_frame(&cur_frame);
+    }
+
+    // safely create a frame and call frame::describe
+    intptr_t *dump_sp = cur_frame.sender_sp();
+    intptr_t *dump_fp = cur_frame.link();
+
+    bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+    // the sender for cur_frame is the caller of trace_method_handle
+    if (walkable) {
+      // The previous definition of walkable may have to be refined
+      // if new call sites cause the next frame constructor to start
+      // failing. Alternatively, frame constructors could be
+      // modified to support the current or future non walkable
+      // frames (but this is more intrusive and is not considered as
+      // part of this RFE, which will instead use a simpler output).
+      frame dump_frame = frame(dump_sp,
+                               cur_frame.sp(), // younger_sp
+                               false); // no adaptation
+      dump_frame.describe(values, 1);
+    } else {
+      // Robust dump for frames which cannot be constructed from sp/younger_sp
+      // Add descriptions without building a Java frame to avoid issues
+      values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+      values.describe(-1, dump_sp, "sp");
+    }
+
+    bool has_args = has_mh; // whether Gargs is meaningful
+
+    // mark args, if seems valid (may not be valid for some adapters)
+    if (has_args) {
+      if ((args >= dump_sp) && (args < dump_fp)) {
+        values.describe(-1, args, "*G4_args");
+      }
+    }
+
+    // mark saved_sp, if seems valid (may not be valid for some adapters)
+    intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp);
+    if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) {
+      values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS");
+    }
+
+    // Note: the unextended_sp may not be correct
+    tty->print_cr("  stack layout:");
+    values.print(p);
+  }
+
+  if (has_mh) {
     print_method_handle(mh);
+  }
 }
+
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
   // save: Gargs, O5_savedSP
-  __ save_frame(16);
+  __ save_frame(16); // need space for saving required FPU state
+
   __ set((intptr_t) adaptername, O0);
   __ mov(G3_method_handle, O1);
   __ mov(I5_savedSP, O2);
+  __ mov(Gargs, O3);
+  __ mov(I6, O4); // frame identifier for safe stack walking
+
+  // Save scratched registers that might be needed. Robustness is more
+  // important than optimizing the saves for this debug only code.
+
+  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
+  Address d_save(FP, -sizeof(jdouble) + STACK_BIAS);
+  __ stf(FloatRegisterImpl::D, Ftos_d, d_save);
+  // Safely save all globals but G2 (handled by call_VM_leaf) and G7
+  // (OS reserved).
   __ mov(G3_method_handle, L3);
   __ mov(Gargs, L4);
   __ mov(G5_method_type, L5);
-  __ call_VM_leaf(L7, CAST_FROM_FN_PTR(address, trace_method_handle_stub));
+  __ mov(G6, L6);
+  __ mov(G1, L1);
+
+  __ call_VM_leaf(L2 /* for G2 */, CAST_FROM_FN_PTR(address, trace_method_handle_stub));
 
   __ mov(L3, G3_method_handle);
   __ mov(L4, Gargs);
   __ mov(L5, G5_method_type);
+  __ mov(L6, G6);
+  __ mov(L1, G1);
+  __ ldf(FloatRegisterImpl::D, d_save, Ftos_d);
+
   __ restore();
   BLOCK_COMMENT("} trace_method_handle");
 }
@@ -1250,7 +1363,7 @@
         move_typed_arg(_masm, arg_type, false,
                        prim_value_addr,
                        Address(O0_argslot, 0),
-                       O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
+                      O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
       }
 
       if (direct_to_method) {
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -145,6 +145,8 @@
   }
 
   static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+
+  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
 };
 
 // Additional helper methods for MethodHandles code generation:
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -651,13 +651,15 @@
   return &interpreter_frame_tos_address()[index];
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 #define DESCRIBE_FP_OFFSET(name) \
   values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
-  if (is_interpreted_frame()) {
+  if (is_ricochet_frame()) {
+    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+  } else if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_method);
@@ -667,7 +669,6 @@
     DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
     DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
   }
-
 }
 #endif
 
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -279,14 +279,16 @@
 }
 
 // Emit code to verify that RBP is pointing at a valid ricochet frame.
-#ifdef ASSERT
+#ifndef PRODUCT
 enum {
   ARG_LIMIT = 255, SLOP = 4,
   // use this parameter for checking for garbage stack movements:
   UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
   // the slop defends against false alarms due to fencepost errors
 };
+#endif
 
+#ifdef ASSERT
 void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
   // The stack should look like this:
   //    ... keep1 | dest=42 | keep2 | RF | magic | handler | magic | recursive args |
@@ -990,6 +992,26 @@
   BLOCK_COMMENT("} move_return_value");
 }
 
+#ifndef PRODUCT
+#define DESCRIBE_RICOCHET_OFFSET(rf, name) \
+  values.describe(frame_no, (intptr_t *) (((uintptr_t)rf) + MethodHandles::RicochetFrame::name##_offset_in_bytes()), #name)
+
+void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+    address bp = (address) fr->fp();
+    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
+
+    // ricochet slots
+    DESCRIBE_RICOCHET_OFFSET(rf, exact_sender_sp);
+    DESCRIBE_RICOCHET_OFFSET(rf, conversion);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_base);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_layout);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_target);
+    DESCRIBE_RICOCHET_OFFSET(rf, continuation);
+
+    // relevant ricochet targets (in caller frame)
+    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+}
+#endif // ASSERT
 
 #ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
@@ -1001,6 +1023,7 @@
                               intptr_t* saved_bp) {
   // called as a leaf from native code: do not block the JVM!
   bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have rcx_mh
+
   intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset];
   intptr_t* base_sp = last_sp;
   typedef MethodHandles::RicochetFrame RicochetFrame;
@@ -1030,13 +1053,64 @@
     tty->cr();
     if (last_sp != saved_sp && last_sp != NULL)
       tty->print_cr("*** last_sp="PTR_FORMAT, (intptr_t)last_sp);
-    int stack_dump_count = 16;
-    if (stack_dump_count < (int)(saved_bp + 2 - saved_sp))
-      stack_dump_count = (int)(saved_bp + 2 - saved_sp);
-    if (stack_dump_count > 64)  stack_dump_count = 48;
-    for (i = 0; i < stack_dump_count; i += 4) {
-      tty->print_cr(" dump at SP[%d] "PTR_FORMAT": "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT,
-                    i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]);
+
+    {
+     // dumping last frame with frame::describe
+
+      JavaThread* p = JavaThread::active();
+
+      ResourceMark rm;
+      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+      FrameValues values;
+
+      // Note: We want to allow trace_method_handle from any call site.
+      // While trace_method_handle creates a frame, it may be entered
+      // without a PC on the stack top (e.g. not just after a call).
+      // Walking that frame could lead to failures due to that invalid PC.
+      // => carefully detect that frame when doing the stack walking
+
+      // Current C frame
+      frame cur_frame = os::current_frame();
+
+      // Robust search of trace_calling_frame (independant of inlining).
+      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
+      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
+      while (trace_calling_frame.fp() < saved_regs) {
+        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
+      }
+
+      // safely create a frame and call frame::describe
+      intptr_t *dump_sp = trace_calling_frame.sender_sp();
+      intptr_t *dump_fp = trace_calling_frame.link();
+
+      bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+      if (walkable) {
+        // The previous definition of walkable may have to be refined
+        // if new call sites cause the next frame constructor to start
+        // failing. Alternatively, frame constructors could be
+        // modified to support the current or future non walkable
+        // frames (but this is more intrusive and is not considered as
+        // part of this RFE, which will instead use a simpler output).
+        frame dump_frame = frame(dump_sp, dump_fp);
+        dump_frame.describe(values, 1);
+      } else {
+        // Stack may not be walkable (invalid PC above FP):
+        // Add descriptions without building a Java frame to avoid issues
+        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+        values.describe(-1, dump_sp, "sp for #1");
+      }
+
+      // mark saved_sp if seems valid
+      if (has_mh) {
+        if ((saved_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (saved_sp < dump_fp)) {
+          values.describe(-1, saved_sp, "*saved_sp");
+        }
+      }
+
+      tty->print_cr("  stack layout:");
+      values.print(p);
     }
     if (has_mh)
       print_method_handle(mh);
@@ -1066,26 +1140,49 @@
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
-  __ push(rax);
-  __ lea(rax, Address(rsp, wordSize * NOT_LP64(6) LP64_ONLY(14))); // entry_sp  __ pusha();
+  __ enter();
+  __ andptr(rsp, -16); // align stack if needed for FPU state
   __ pusha();
-  __ mov(rbx, rsp);
-  __ enter();
+  __ mov(rbx, rsp); // for retreiving saved_regs
+  // Note: saved_regs must be in the entered frame for the
+  // robust stack walking implemented in trace_method_handle_stub.
+
+  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
+  __ increment(rsp, -2 * wordSize);
+  if  (UseSSE >= 2) {
+    __ movdbl(Address(rsp, 0), xmm0);
+  } else if (UseSSE == 1) {
+    __ movflt(Address(rsp, 0), xmm0);
+  } else {
+    __ fst_d(Address(rsp, 0));
+  }
+
   // incoming state:
   // rcx: method handle
   // r13 or rsi: saved sp
   // To avoid calling convention issues, build a record on the stack and pass the pointer to that instead.
+  // Note: fix the increment below if pushing more arguments
   __ push(rbp);               // saved_bp
-  __ push(rsi);               // saved_sp
-  __ push(rax);               // entry_sp
+  __ push(saved_last_sp_register()); // saved_sp
+  __ push(rbp);               // entry_sp (with extra align space)
   __ push(rbx);               // pusha saved_regs
   __ push(rcx);               // mh
-  __ push(rcx);               // adaptername
+  __ push(rcx);               // slot for adaptername
   __ movptr(Address(rsp, 0), (intptr_t) adaptername);
   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), rsp);
-  __ leave();
+  __ increment(rsp, 6 * wordSize); // MethodHandleStubArguments
+
+  if  (UseSSE >= 2) {
+    __ movdbl(xmm0, Address(rsp, 0));
+  } else if (UseSSE == 1) {
+    __ movflt(xmm0, Address(rsp, 0));
+  } else {
+    __ fld_d(Address(rsp, 0));
+  }
+  __ increment(rsp, 2 * wordSize);
+
   __ popa();
-  __ pop(rax);
+  __ leave();
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif //PRODUCT
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -224,6 +224,8 @@
   }
 
   static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+
+  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
 };
 
 // Additional helper methods for MethodHandles code generation:
--- a/hotspot/src/cpu/zero/vm/frame_zero.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/cpu/zero/vm/frame_zero.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -418,7 +418,7 @@
   }
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
 
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1354,9 +1354,10 @@
   CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
 
  public:
-  LIR_OpBranch(LIR_Condition cond, Label* lbl)
+  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
     : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
     , _cond(cond)
+    , _type(type)
     , _label(lbl)
     , _block(NULL)
     , _ublock(NULL)
@@ -2053,7 +2054,7 @@
   void jump(CodeStub* stub) {
     append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, stub));
   }
-  void branch(LIR_Condition cond, Label* lbl)        { append(new LIR_OpBranch(cond, lbl)); }
+  void branch(LIR_Condition cond, BasicType type, Label* lbl)        { append(new LIR_OpBranch(cond, type, lbl)); }
   void branch(LIR_Condition cond, BasicType type, BlockBegin* block) {
     assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons");
     append(new LIR_OpBranch(cond, type, block));
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -2350,7 +2350,7 @@
     } else {
       LabelObj* L = new LabelObj();
       __ cmp(lir_cond_less, value, low_key);
-      __ branch(lir_cond_less, L->label());
+      __ branch(lir_cond_less, T_INT, L->label());
       __ cmp(lir_cond_lessEqual, value, high_key);
       __ branch(lir_cond_lessEqual, T_INT, dest);
       __ branch_destination(L->label());
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -413,8 +413,9 @@
     }
     bci = branch_bci + offset;
   }
-
+  assert(!HAS_PENDING_EXCEPTION, "Should not have any exceptions pending");
   osr_nm = CompilationPolicy::policy()->event(enclosing_method, method, branch_bci, bci, level, nm, THREAD);
+  assert(!HAS_PENDING_EXCEPTION, "Event handler should not throw any exceptions");
   return osr_nm;
 }
 
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/classfile/javaClasses.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1347,7 +1347,13 @@
     return _backtrace();
   }
 
-  inline void push(methodOop method, short bci, TRAPS) {
+  inline void push(methodOop method, int bci, TRAPS) {
+    // Smear the -1 bci to 0 since the array only holds unsigned
+    // shorts.  The later line number lookup would just smear the -1
+    // to a 0 even if it could be recorded.
+    if (bci == SynchronizationEntryBCI) bci = 0;
+    assert(bci == (jushort)bci, "doesn't fit");
+
     if (_index >= trace_chunk_size) {
       methodHandle mhandle(THREAD, method);
       expand(CHECK);
@@ -1574,8 +1580,13 @@
   int chunk_count = 0;
 
   for (;!st.at_end(); st.next()) {
-    // add element
-    bcis->ushort_at_put(chunk_count, st.bci());
+    // Add entry and smear the -1 bci to 0 since the array only holds
+    // unsigned shorts.  The later line number lookup would just smear
+    // the -1 to a 0 even if it could be recorded.
+    int bci = st.bci();
+    if (bci == SynchronizationEntryBCI) bci = 0;
+    assert(bci == (jushort)bci, "doesn't fit");
+    bcis->ushort_at_put(chunk_count, bci);
     methods->obj_at_put(chunk_count, st.method());
 
     chunk_count++;
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -962,7 +962,7 @@
                                         methodHandle hot_method,
                                         int hot_count,
                                         const char* comment,
-                                        TRAPS) {
+                                        Thread* thread) {
   // do nothing if compiler thread(s) is not available
   if (!_initialized ) {
     return;
@@ -1038,7 +1038,7 @@
 
   // Acquire our lock.
   {
-    MutexLocker locker(queue->lock(), THREAD);
+    MutexLocker locker(queue->lock(), thread);
 
     // Make sure the method has not slipped into the queues since
     // last we checked; note that those checks were "fast bail-outs".
@@ -1120,7 +1120,7 @@
 nmethod* CompileBroker::compile_method(methodHandle method, int osr_bci,
                                        int comp_level,
                                        methodHandle hot_method, int hot_count,
-                                       const char* comment, TRAPS) {
+                                       const char* comment, Thread* THREAD) {
   // make sure arguments make sense
   assert(method->method_holder()->klass_part()->oop_is_instance(), "not an instance method");
   assert(osr_bci == InvocationEntryBci || (0 <= osr_bci && osr_bci < method->code_size()), "bci out of range");
@@ -1174,10 +1174,10 @@
   assert(!HAS_PENDING_EXCEPTION, "No exception should be present");
   // some prerequisites that are compiler specific
   if (compiler(comp_level)->is_c2() || compiler(comp_level)->is_shark()) {
-    method->constants()->resolve_string_constants(CHECK_0);
+    method->constants()->resolve_string_constants(CHECK_AND_CLEAR_NULL);
     // Resolve all classes seen in the signature of the method
     // we are compiling.
-    methodOopDesc::load_signature_classes(method, CHECK_0);
+    methodOopDesc::load_signature_classes(method, CHECK_AND_CLEAR_NULL);
   }
 
   // If the method is native, do the lookup in the thread requesting
@@ -1231,7 +1231,7 @@
       return NULL;
     }
   } else {
-    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, CHECK_0);
+    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, THREAD);
   }
 
   // return requested nmethod
--- a/hotspot/src/share/vm/compiler/compileBroker.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/compiler/compileBroker.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -333,7 +333,7 @@
                                   methodHandle hot_method,
                                   int hot_count,
                                   const char* comment,
-                                  TRAPS);
+                                  Thread* thread);
   static CompileQueue* compile_queue(int comp_level) {
     if (is_c2_compile(comp_level)) return _c2_method_queue;
     if (is_c1_compile(comp_level)) return _c1_method_queue;
@@ -363,7 +363,7 @@
                                  int comp_level,
                                  methodHandle hot_method,
                                  int hot_count,
-                                 const char* comment, TRAPS);
+                                 const char* comment, Thread* thread);
 
   static void compiler_thread_loop();
 
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -42,8 +42,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 
-//
-// CMS Bit Map Wrapper
+// Concurrent marking bit map wrapper
 
 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
   _bm((uintptr_t*)NULL,0),
@@ -53,13 +52,13 @@
   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 
-  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
+  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
   // For now we'll just commit all of the bit map up fromt.
   // Later on we'll try to be more parsimonious with swap.
   guarantee(_virtual_space.initialize(brs, brs.size()),
-            "couldn't reseve backing store for CMS bit map");
+            "couldn't reseve backing store for concurrent marking bit map");
   assert(_virtual_space.committed_size() == brs.size(),
-         "didn't reserve backing store for all of CMS bit map?");
+         "didn't reserve backing store for all of concurrent marking bit map?");
   _bm.set_map((uintptr_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
@@ -104,17 +103,6 @@
   return (int) (diff >> _shifter);
 }
 
-bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
-  HeapWord* left  = MAX2(_bmStartWord, mr.start());
-  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
-  if (right > left) {
-    // Right-open interval [leftOffset, rightOffset).
-    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
-  } else {
-    return true;
-  }
-}
-
 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
                                              size_t    from_start_index,
                                              HeapWord* to_start_word,
@@ -431,8 +419,6 @@
     assert(newOop->is_oop(), "Expected an oop");
     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
            "only grey objects on this stack");
-    // iterate over the oops in this oop, marking and pushing
-    // the ones in CMS generation.
     newOop->oop_iterate(cl);
     if (yield_after && _cm->do_yield_check()) {
       res = false;
@@ -474,6 +460,84 @@
               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 }
 
+CMRootRegions::CMRootRegions() :
+  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
+  _should_abort(false),  _next_survivor(NULL) { }
+
+void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
+  _young_list = g1h->young_list();
+  _cm = cm;
+}
+
+void CMRootRegions::prepare_for_scan() {
+  assert(!scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  assert(_next_survivor == NULL, "pre-condition");
+  _next_survivor = _young_list->first_survivor_region();
+  _scan_in_progress = (_next_survivor != NULL);
+  _should_abort = false;
+}
+
+HeapRegion* CMRootRegions::claim_next() {
+  if (_should_abort) {
+    // If someone has set the should_abort flag, we return NULL to
+    // force the caller to bail out of their loop.
+    return NULL;
+  }
+
+  // Currently, only survivors can be root regions.
+  HeapRegion* res = _next_survivor;
+  if (res != NULL) {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    // Read it again in case it changed while we were waiting for the lock.
+    res = _next_survivor;
+    if (res != NULL) {
+      if (res == _young_list->last_survivor_region()) {
+        // We just claimed the last survivor so store NULL to indicate
+        // that we're done.
+        _next_survivor = NULL;
+      } else {
+        _next_survivor = res->get_next_young_region();
+      }
+    } else {
+      // Someone else claimed the last survivor while we were trying
+      // to take the lock so nothing else to do.
+    }
+  }
+  assert(res == NULL || res->is_survivor(), "post-condition");
+
+  return res;
+}
+
+void CMRootRegions::scan_finished() {
+  assert(scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  if (!_should_abort) {
+    assert(_next_survivor == NULL, "we should have claimed all survivors");
+  }
+  _next_survivor = NULL;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    _scan_in_progress = false;
+    RootRegionScan_lock->notify_all();
+  }
+}
+
+bool CMRootRegions::wait_until_scan_finished() {
+  if (!scan_in_progress()) return false;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    while (scan_in_progress()) {
+      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+  return true;
+}
+
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
@@ -498,6 +562,7 @@
   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
            CardTableModRefBS::card_shift,
            false /* in_resource_area*/),
+
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
   _at_least_one_mark_complete(false),
@@ -526,7 +591,11 @@
   _cleanup_times(),
   _total_counting_time(0.0),
   _total_rs_scrub_time(0.0),
-  _parallel_workers(NULL) {
+
+  _parallel_workers(NULL),
+
+  _count_card_bitmaps(NULL),
+  _count_marked_bytes(NULL) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
     verbose_level = no_verbose;
@@ -557,9 +626,16 @@
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
+  _root_regions.init(_g1h, this);
+
   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   _active_tasks = _max_task_num;
   for (int i = 0; i < (int) _max_task_num; ++i) {
@@ -567,10 +643,26 @@
     task_queue->initialize();
     _task_queues->register_queue(i, task_queue);
 
-    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
+
+    _tasks[i] = new CMTask(i, this,
+                           _count_marked_bytes[i],
+                           &_count_card_bitmaps[i],
+                           task_queue, _task_queues);
+
     _accum_task_vtime[i] = 0.0;
   }
 
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
   if (ConcGCThreads > ParallelGCThreads) {
     vm_exit_during_initialization("Can't have more ConcGCThreads "
                                   "than ParallelGCThreads.");
@@ -750,11 +842,6 @@
   ShouldNotReachHere();
 }
 
-// This closure is used to mark refs into the g1 generation
-// from external roots in the CMS bit map.
-// Called at the first checkpoint.
-//
-
 void ConcurrentMark::clearNextBitmap() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
@@ -794,6 +881,9 @@
     assert(!g1h->mark_in_progress(), "invariant");
   }
 
+  // Clear the liveness counting data
+  clear_all_count_data();
+
   // Repeat the asserts from above.
   guarantee(cmThread()->during_cycle(), "invariant");
   guarantee(!g1h->mark_in_progress(), "invariant");
@@ -854,6 +944,8 @@
   satb_mq_set.set_active_all_threads(true, /* new active value */
                                      false /* expected_active */);
 
+  _root_regions.prepare_for_scan();
+
   // update_g1_committed() will be called at the end of an evac pause
   // when marking is on. So, it's also called at the end of the
   // initial-mark pause to update the heap end, if the heap expands
@@ -1147,6 +1239,69 @@
   return 0;
 }
 
+void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
+  // Currently, only survivors can be root regions.
+  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
+  G1RootRegionScanClosure cl(_g1h, this, worker_id);
+
+  const uintx interval = PrefetchScanIntervalInBytes;
+  HeapWord* curr = hr->bottom();
+  const HeapWord* end = hr->top();
+  while (curr < end) {
+    Prefetch::read(curr, interval);
+    oop obj = oop(curr);
+    int size = obj->oop_iterate(&cl);
+    assert(size == obj->size(), "sanity");
+    curr += size;
+  }
+}
+
+class CMRootRegionScanTask : public AbstractGangTask {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  CMRootRegionScanTask(ConcurrentMark* cm) :
+    AbstractGangTask("Root Region Scan"), _cm(cm) { }
+
+  void work(uint worker_id) {
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
+
+    CMRootRegions* root_regions = _cm->root_regions();
+    HeapRegion* hr = root_regions->claim_next();
+    while (hr != NULL) {
+      _cm->scanRootRegion(hr, worker_id);
+      hr = root_regions->claim_next();
+    }
+  }
+};
+
+void ConcurrentMark::scanRootRegions() {
+  // scan_in_progress() will have been set to true only if there was
+  // at least one root region to scan. So, if it's false, we
+  // should not attempt to do any further work.
+  if (root_regions()->scan_in_progress()) {
+    _parallel_marking_threads = calc_parallel_marking_threads();
+    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+           "Maximum number of marking threads exceeded");
+    uint active_workers = MAX2(1U, parallel_marking_threads());
+
+    CMRootRegionScanTask task(this);
+    if (parallel_marking_threads() > 0) {
+      _parallel_workers->set_active_workers((int) active_workers);
+      _parallel_workers->run_task(&task);
+    } else {
+      task.work(0);
+    }
+
+    // It's possible that has_aborted() is true here without actually
+    // aborting the survivor scan earlier. This is OK as it's
+    // mainly used for sanity checking.
+    root_regions()->scan_finished();
+  }
+}
+
 void ConcurrentMark::markFromRoots() {
   // we might be tempted to assert that:
   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
@@ -1225,6 +1380,10 @@
       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
     }
   } else {
+    // Aggregate the per-task counting data that we have accumulated
+    // while marking.
+    aggregate_count_data();
+
     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
     // We're done with marking.
     // This is the end of  the marking cycle, we're expected all
@@ -1262,48 +1421,41 @@
   g1p->record_concurrent_mark_remark_end();
 }
 
-#define CARD_BM_TEST_MODE 0
-
+// Used to calculate the # live objects per region
+// for verification purposes
 class CalcLiveObjectsClosure: public HeapRegionClosure {
 
   CMBitMapRO* _bm;
   ConcurrentMark* _cm;
-  bool _changed;
-  bool _yield;
-  size_t _words_done;
-  size_t _tot_live;
-  size_t _tot_used;
-  size_t _regions_done;
-  double _start_vtime_sec;
-
   BitMap* _region_bm;
   BitMap* _card_bm;
+
+  // Debugging
+  size_t _tot_words_done;
+  size_t _tot_live;
+  size_t _tot_used;
+
+  size_t _region_marked_bytes;
+
   intptr_t _bottom_card_num;
-  bool _final;
 
   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
-    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
-#if CARD_BM_TEST_MODE
-      guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
-#else
-      _card_bm->par_at_put(i - _bottom_card_num, 1);
-#endif
+    assert(start_card_num <= last_card_num, "sanity");
+    BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
+    BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
+
+    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+      _card_bm->par_at_put(i, 1);
     }
   }
 
 public:
-  CalcLiveObjectsClosure(bool final,
-                         CMBitMapRO *bm, ConcurrentMark *cm,
+  CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
                          BitMap* region_bm, BitMap* card_bm) :
-    _bm(bm), _cm(cm), _changed(false), _yield(true),
-    _words_done(0), _tot_live(0), _tot_used(0),
-    _region_bm(region_bm), _card_bm(card_bm),_final(final),
-    _regions_done(0), _start_vtime_sec(0.0)
-  {
-    _bottom_card_num =
-      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
-               CardTableModRefBS::card_shift);
-  }
+    _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
+    _region_marked_bytes(0), _tot_words_done(0),
+    _tot_live(0), _tot_used(0),
+    _bottom_card_num(cm->heap_bottom_card_num()) { }
 
   // It takes a region that's not empty (i.e., it has at least one
   // live object in it and sets its corresponding bit on the region
@@ -1319,29 +1471,16 @@
       _region_bm->par_at_put((BitMap::idx_t) index, true);
     } else {
       // Starts humongous case: calculate how many regions are part of
-      // this humongous region and then set the bit range. It might
-      // have been a bit more efficient to look at the object that
-      // spans these humongous regions to calculate their number from
-      // the object's size. However, it's a good idea to calculate
-      // this based on the metadata itself, and not the region
-      // contents, so that this code is not aware of what goes into
-      // the humongous regions (in case this changes in the future).
+      // this humongous region and then set the bit range.
       G1CollectedHeap* g1h = G1CollectedHeap::heap();
-      size_t end_index = index + 1;
-      while (end_index < g1h->n_regions()) {
-        HeapRegion* chr = g1h->region_at(end_index);
-        if (!chr->continuesHumongous()) break;
-        end_index += 1;
-      }
+      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
+      size_t end_index = last_hr->hrs_index() + 1;
       _region_bm->par_at_put_range((BitMap::idx_t) index,
                                    (BitMap::idx_t) end_index, true);
     }
   }
 
   bool doHeapRegion(HeapRegion* hr) {
-    if (!_final && _regions_done == 0) {
-      _start_vtime_sec = os::elapsedVTime();
-    }
 
     if (hr->continuesHumongous()) {
       // We will ignore these here and process them when their
@@ -1355,48 +1494,41 @@
     }
 
     HeapWord* nextTop = hr->next_top_at_mark_start();
-    HeapWord* start   = hr->top_at_conc_mark_count();
-    assert(hr->bottom() <= start && start <= hr->end() &&
-           hr->bottom() <= nextTop && nextTop <= hr->end() &&
-           start <= nextTop,
-           "Preconditions.");
-    // Otherwise, record the number of word's we'll examine.
+    HeapWord* start   = hr->bottom();
+
+    assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
+           err_msg("Preconditions not met - "
+                   "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
+                   start, nextTop, hr->end()));
+
+    // Record the number of word's we'll examine.
     size_t words_done = (nextTop - start);
+
     // Find the first marked object at or after "start".
     start = _bm->getNextMarkedWordAddress(start, nextTop);
+
     size_t marked_bytes = 0;
 
     // Below, the term "card num" means the result of shifting an address
     // by the card shift -- address 0 corresponds to card number 0.  One
     // must subtract the card num of the bottom of the heap to obtain a
     // card table index.
+
     // The first card num of the sequence of live cards currently being
     // constructed.  -1 ==> no sequence.
     intptr_t start_card_num = -1;
+
     // The last card num of the sequence of live cards currently being
     // constructed.  -1 ==> no sequence.
     intptr_t last_card_num = -1;
 
     while (start < nextTop) {
-      if (_yield && _cm->do_yield_check()) {
-        // We yielded.  It might be for a full collection, in which case
-        // all bets are off; terminate the traversal.
-        if (_cm->has_aborted()) {
-          _changed = false;
-          return true;
-        } else {
-          // Otherwise, it might be a collection pause, and the region
-          // we're looking at might be in the collection set.  We'll
-          // abandon this region.
-          return false;
-        }
-      }
       oop obj = oop(start);
       int obj_sz = obj->size();
+
       // The card num of the start of the current object.
       intptr_t obj_card_num =
         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
-
       HeapWord* obj_last = start + obj_sz - 1;
       intptr_t obj_last_card_num =
         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
@@ -1414,110 +1546,404 @@
             start_card_num = obj_card_num;
           }
         }
-#if CARD_BM_TEST_MODE
-        /*
-        gclog_or_tty->print_cr("Setting bits from %d/%d.",
-                               obj_card_num - _bottom_card_num,
-                               obj_last_card_num - _bottom_card_num);
-        */
-        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
-          _card_bm->par_at_put(j - _bottom_card_num, 1);
-        }
-#endif
       }
       // In any case, we set the last card num.
       last_card_num = obj_last_card_num;
 
       marked_bytes += (size_t)obj_sz * HeapWordSize;
+
       // Find the next marked object after this one.
       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
-      _changed = true;
     }
+
     // Handle the last range, if any.
     if (start_card_num != -1) {
       mark_card_num_range(start_card_num, last_card_num);
     }
-    if (_final) {
-      // Mark the allocated-since-marking portion...
-      HeapWord* tp = hr->top();
-      if (nextTop < tp) {
-        start_card_num =
-          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
-        last_card_num =
-          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
-        mark_card_num_range(start_card_num, last_card_num);
-        // This definitely means the region has live objects.
-        set_bit_for_region(hr);
-      }
+
+    // Mark the allocated-since-marking portion...
+    HeapWord* top = hr->top();
+    if (nextTop < top) {
+      start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
+      last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
+
+      mark_card_num_range(start_card_num, last_card_num);
+
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
     }
 
-    hr->add_to_marked_bytes(marked_bytes);
     // Update the live region bitmap.
     if (marked_bytes > 0) {
       set_bit_for_region(hr);
     }
-    hr->set_top_at_conc_mark_count(nextTop);
+
+    // Set the marked bytes for the current region so that
+    // it can be queried by a calling verificiation routine
+    _region_marked_bytes = marked_bytes;
+
     _tot_live += hr->next_live_bytes();
     _tot_used += hr->used();
-    _words_done = words_done;
-
-    if (!_final) {
-      ++_regions_done;
-      if (_regions_done % 10 == 0) {
-        double end_vtime_sec = os::elapsedVTime();
-        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
-        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
-          jlong sleep_time_ms =
-            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
-          os::sleep(Thread::current(), sleep_time_ms, false);
-          _start_vtime_sec = end_vtime_sec;
-        }
-      }
-    }
+    _tot_words_done = words_done;
 
     return false;
   }
 
-  bool changed() { return _changed;  }
-  void reset()   { _changed = false; _words_done = 0; }
-  void no_yield() { _yield = false; }
-  size_t words_done() { return _words_done; }
-  size_t tot_live() { return _tot_live; }
-  size_t tot_used() { return _tot_used; }
+  size_t region_marked_bytes() const { return _region_marked_bytes; }
+
+  // Debugging
+  size_t tot_words_done() const      { return _tot_words_done; }
+  size_t tot_live() const            { return _tot_live; }
+  size_t tot_used() const            { return _tot_used; }
+};
+
+// Heap region closure used for verifying the counting data
+// that was accumulated concurrently and aggregated during
+// the remark pause. This closure is applied to the heap
+// regions during the STW cleanup pause.
+
+class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  CalcLiveObjectsClosure _calc_cl;
+  BitMap* _region_bm;   // Region BM to be verified
+  BitMap* _card_bm;     // Card BM to be verified
+  bool _verbose;        // verbose output?
+
+  BitMap* _exp_region_bm; // Expected Region BM values
+  BitMap* _exp_card_bm;   // Expected card BM values
+
+  int _failures;
+
+public:
+  VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
+                                BitMap* region_bm,
+                                BitMap* card_bm,
+                                BitMap* exp_region_bm,
+                                BitMap* exp_card_bm,
+                                bool verbose) :
+    _cm(cm),
+    _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
+    _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
+    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
+    _failures(0) { }
+
+  int failures() const { return _failures; }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    int failures = 0;
+
+    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
+    // this region and set the corresponding bits in the expected region
+    // and card bitmaps.
+    bool res = _calc_cl.doHeapRegion(hr);
+    assert(res == false, "should be continuing");
+
+    MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
+                    Mutex::_no_safepoint_check_flag);
+
+    // Verify that _top_at_conc_count == ntams
+    if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": top at conc count incorrect: "
+                               "expected " PTR_FORMAT ", actual: " PTR_FORMAT,
+                               hr->hrs_index(), hr->next_top_at_mark_start(),
+                               hr->top_at_conc_mark_count());
+      }
+      failures += 1;
+    }
+
+    // Verify the marked bytes for this region.
+    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
+    size_t act_marked_bytes = hr->next_marked_bytes();
+
+    // We're not OK if expected marked bytes > actual marked bytes. It means
+    // we have missed accounting some objects during the actual marking.
+    if (exp_marked_bytes > act_marked_bytes) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": marked bytes mismatch: "
+                               "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
+                               hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
+      }
+      failures += 1;
+    }
+
+    // Verify the bit, for this region, in the actual and expected
+    // (which was just calculated) region bit maps.
+    // We're not OK if the bit in the calculated expected region
+    // bitmap is set and the bit in the actual region bitmap is not.
+    BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
+
+    bool expected = _exp_region_bm->at(index);
+    bool actual = _region_bm->at(index);
+    if (expected && !actual) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": region bitmap mismatch: "
+                               "expected: %d, actual: %d",
+                               hr->hrs_index(), expected, actual);
+      }
+      failures += 1;
+    }
+
+    // Verify that the card bit maps for the cards spanned by the current
+    // region match. We have an error if we have a set bit in the expected
+    // bit map and the corresponding bit in the actual bitmap is not set.
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
+
+    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
+      expected = _exp_card_bm->at(i);
+      actual = _card_bm->at(i);
+
+      if (expected && !actual) {
+        if (_verbose) {
+          gclog_or_tty->print_cr("Region " SIZE_FORMAT ": card bitmap mismatch at " SIZE_FORMAT ": "
+                                 "expected: %d, actual: %d",
+                                 hr->hrs_index(), i, expected, actual);
+        }
+        failures += 1;
+      }
+    }
+
+    if (failures > 0 && _verbose)  {
+      gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
+                             "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
+                             HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
+                             _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
+    }
+
+    _failures += failures;
+
+    // We could stop iteration over the heap when we
+    // find the first voilating region by returning true.
+    return false;
+  }
 };
 
 
-void ConcurrentMark::calcDesiredRegions() {
-  _region_bm.clear();
-  _card_bm.clear();
-  CalcLiveObjectsClosure calccl(false /*final*/,
-                                nextMarkBitMap(), this,
-                                &_region_bm, &_card_bm);
-  G1CollectedHeap *g1h = G1CollectedHeap::heap();
-  g1h->heap_region_iterate(&calccl);
-
-  do {
-    calccl.reset();
-    g1h->heap_region_iterate(&calccl);
-  } while (calccl.changed());
-}
+class G1ParVerifyFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
+  uint    _n_workers;
+
+  BitMap* _expected_region_bm;
+  BitMap* _expected_card_bm;
+
+  int  _failures;
+  bool _verbose;
+
+public:
+  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
+                            BitMap* region_bm, BitMap* card_bm,
+                            BitMap* expected_region_bm, BitMap* expected_card_bm)
+    : AbstractGangTask("G1 verify final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
+      _failures(0), _verbose(false),
+      _n_workers(0) {
+    assert(VerifyDuringGC, "don't call this otherwise");
+
+    // Use the value already set as the number of active threads
+    // in the call to run_task().
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      assert( _g1h->workers()->active_workers() > 0,
+        "Should have been previously set");
+      _n_workers = _g1h->workers()->active_workers();
+    } else {
+      _n_workers = 1;
+    }
+
+    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
+    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
+
+    _verbose = _cm->verbose_medium();
+  }
+
+  void work(uint worker_id) {
+    assert(worker_id < _n_workers, "invariant");
+
+    VerifyLiveObjectDataHRClosure verify_cl(_cm,
+                                            _actual_region_bm, _actual_card_bm,
+                                            _expected_region_bm,
+                                            _expected_card_bm,
+                                            _verbose);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      _g1h->heap_region_par_iterate_chunked(&verify_cl,
+                                            worker_id,
+                                            _n_workers,
+                                            HeapRegion::VerifyCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&verify_cl);
+    }
+
+    Atomic::add(verify_cl.failures(), &_failures);
+  }
+
+  int failures() const { return _failures; }
+};
+
+// Final update of count data (during cleanup).
+// Adds [top_at_count, NTAMS) to the marked bytes for each
+// region. Sets the bits in the card bitmap corresponding
+// to the interval [top_at_count, top], and sets the
+// liveness bit for each region containing live data
+// in the region bitmap.
+
+class FinalCountDataUpdateClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+
+  size_t _total_live_bytes;
+  size_t _total_used_bytes;
+  size_t _total_words_done;
+
+  void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
+    assert(start_idx <= last_idx, "sanity");
+
+    // Set the inclusive bit range [start_idx, last_idx].
+    // For small ranges (up to 8 cards) use a simple loop; otherwise
+    // use par_at_put_range.
+    if ((last_idx - start_idx) <= 8) {
+      for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+        _card_bm->par_set_bit(i);
+      }
+    } else {
+      assert(last_idx < _card_bm->size(), "sanity");
+      // Note BitMap::par_at_put_range() is exclusive.
+      _card_bm->par_at_put_range(start_idx, last_idx+1, true);
+    }
+  }
+
+  // It takes a region that's not empty (i.e., it has at least one
+  // live object in it and sets its corresponding bit on the region
+  // bitmap to 1. If the region is "starts humongous" it will also set
+  // to 1 the bits on the region bitmap that correspond to its
+  // associated "continues humongous" regions.
+  void set_bit_for_region(HeapRegion* hr) {
+    assert(!hr->continuesHumongous(), "should have filtered those out");
+
+    size_t index = hr->hrs_index();
+    if (!hr->startsHumongous()) {
+      // Normal (non-humongous) case: just set the bit.
+      _region_bm->par_set_bit((BitMap::idx_t) index);
+    } else {
+      // Starts humongous case: calculate how many regions are part of
+      // this humongous region and then set the bit range.
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
+      size_t end_index = last_hr->hrs_index() + 1;
+      _region_bm->par_at_put_range((BitMap::idx_t) index,
+                                   (BitMap::idx_t) end_index, true);
+    }
+  }
+
+ public:
+  FinalCountDataUpdateClosure(ConcurrentMark* cm,
+                              BitMap* region_bm,
+                              BitMap* card_bm) :
+    _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
+    _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    HeapWord* start = hr->top_at_conc_mark_count();
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* top   = hr->top();
+
+    assert(hr->bottom() <= start && start <= hr->end() &&
+           hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
+
+    size_t words_done = ntams - hr->bottom();
+
+    if (start < ntams) {
+      // Region was changed between remark and cleanup pauses
+      // We need to add (ntams - start) to the marked bytes
+      // for this region, and set bits for the range
+      // [ card_idx(start), card_idx(ntams) ) in the card bitmap.
+      size_t live_bytes = (ntams - start) * HeapWordSize;
+      hr->add_to_marked_bytes(live_bytes);
+
+      // Record the new top at conc count
+      hr->set_top_at_conc_mark_count(ntams);
+
+      // The setting of the bits in the card bitmap takes place below
+    }
+
+    // Mark the allocated-since-marking portion...
+    if (ntams < top) {
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
+    }
+
+    // Now set the bits for [start, top]
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
+    set_card_bitmap_range(start_idx, last_idx);
+
+    // Set the bit for the region if it contains live data
+    if (hr->next_marked_bytes() > 0) {
+      set_bit_for_region(hr);
+    }
+
+    _total_words_done += words_done;
+    _total_used_bytes += hr->used();
+    _total_live_bytes += hr->next_marked_bytes();
+
+    return false;
+  }
+
+  size_t total_words_done() const { return _total_words_done; }
+  size_t total_live_bytes() const { return _total_live_bytes; }
+  size_t total_used_bytes() const { return _total_used_bytes; }
+};
 
 class G1ParFinalCountTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
-  CMBitMap* _bm;
+  ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
   uint    _n_workers;
+
   size_t *_live_bytes;
   size_t *_used_bytes;
-  BitMap* _region_bm;
-  BitMap* _card_bm;
+
 public:
-  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
-                      BitMap* region_bm, BitMap* card_bm)
-    : AbstractGangTask("G1 final counting"), _g1h(g1h),
-    _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
-    _n_workers(0)
-  {
+  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
+    : AbstractGangTask("G1 final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _n_workers(0) {
     // Use the value already set as the number of active threads
     // in the call to run_task().  Needed for the allocation of
     // _live_bytes and _used_bytes.
@@ -1539,29 +1965,32 @@
   }
 
   void work(uint worker_id) {
-    CalcLiveObjectsClosure calccl(true /*final*/,
-                                  _bm, _g1h->concurrent_mark(),
-                                  _region_bm, _card_bm);
-    calccl.no_yield();
+    assert(worker_id < _n_workers, "invariant");
+
+    FinalCountDataUpdateClosure final_update_cl(_cm,
+                                                _actual_region_bm,
+                                                _actual_card_bm);
+
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate_chunked(&calccl, worker_id,
-                                            (int) _n_workers,
+      _g1h->heap_region_par_iterate_chunked(&final_update_cl,
+                                            worker_id,
+                                            _n_workers,
                                             HeapRegion::FinalCountClaimValue);
     } else {
-      _g1h->heap_region_iterate(&calccl);
+      _g1h->heap_region_iterate(&final_update_cl);
     }
-    assert(calccl.complete(), "Shouldn't have yielded!");
-
-    assert(worker_id < _n_workers, "invariant");
-    _live_bytes[worker_id] = calccl.tot_live();
-    _used_bytes[worker_id] = calccl.tot_used();
-  }
+
+    _live_bytes[worker_id] = final_update_cl.total_live_bytes();
+    _used_bytes[worker_id] = final_update_cl.total_used_bytes();
+  }
+
   size_t live_bytes()  {
     size_t live_bytes = 0;
     for (uint i = 0; i < _n_workers; ++i)
       live_bytes += _live_bytes[i];
     return live_bytes;
   }
+
   size_t used_bytes()  {
     size_t used_bytes = 0;
     for (uint i = 0; i < _n_workers; ++i)
@@ -1724,8 +2153,7 @@
   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
                        BitMap* region_bm, BitMap* card_bm) :
     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
-    _region_bm(region_bm), _card_bm(card_bm)
-  {}
+    _region_bm(region_bm), _card_bm(card_bm) { }
 
   void work(uint worker_id) {
     if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -1772,11 +2200,10 @@
   uint n_workers;
 
   // Do counting once more with the world stopped for good measure.
-  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
-                                        &_region_bm, &_card_bm);
+  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
+
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    assert(g1h->check_heap_region_claim_values(
-                                               HeapRegion::InitialClaimValue),
+   assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
            "sanity check");
 
     g1h->set_par_threads();
@@ -1787,14 +2214,42 @@
     // Done with the parallel phase so reset to 0.
     g1h->set_par_threads(0);
 
-    assert(g1h->check_heap_region_claim_values(
-                                             HeapRegion::FinalCountClaimValue),
+    assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
            "sanity check");
   } else {
     n_workers = 1;
     g1_par_count_task.work(0);
   }
 
+  if (VerifyDuringGC) {
+    // Verify that the counting data accumulated during marking matches
+    // that calculated by walking the marking bitmap.
+
+    // Bitmaps to hold expected values
+    BitMap expected_region_bm(_region_bm.size(), false);
+    BitMap expected_card_bm(_card_bm.size(), false);
+
+    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
+                                                 &_region_bm,
+                                                 &_card_bm,
+                                                 &expected_region_bm,
+                                                 &expected_card_bm);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      g1h->set_par_threads((int)n_workers);
+      g1h->workers()->run_task(&g1_par_verify_task);
+      // Done with the parallel phase so reset to 0.
+      g1h->set_par_threads(0);
+
+      assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
+             "sanity check");
+    } else {
+      g1_par_verify_task.work(0);
+    }
+
+    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
+  }
+
   size_t known_garbage_bytes =
     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
   g1p->set_known_garbage_bytes(known_garbage_bytes);
@@ -1905,6 +2360,10 @@
   // races with it goes around and waits for completeCleanup to finish.
   g1h->increment_total_collections();
 
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+
   if (VerifyDuringGC) {
     HandleMark hm;  // handle scope
     gclog_or_tty->print(" VerifyDuringGC:(after)");
@@ -1983,12 +2442,11 @@
 class G1CMKeepAliveClosure: public OopClosure {
   G1CollectedHeap* _g1;
   ConcurrentMark*  _cm;
-  CMBitMap*        _bitMap;
  public:
-  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
-                       CMBitMap* bitMap) :
-    _g1(g1), _cm(cm),
-    _bitMap(bitMap) {}
+  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
+    _g1(g1), _cm(cm) {
+    assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
+  }
 
   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   virtual void do_oop(      oop* p) { do_oop_work(p); }
@@ -2004,26 +2462,25 @@
     }
 
     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
-      _bitMap->mark(addr);
+      _cm->mark_and_count(obj);
       _cm->mark_stack_push(obj);
     }
   }
 };
 
 class G1CMDrainMarkingStackClosure: public VoidClosure {
+  ConcurrentMark*               _cm;
   CMMarkStack*                  _markStack;
-  CMBitMap*                     _bitMap;
   G1CMKeepAliveClosure*         _oopClosure;
  public:
-  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
+  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
                                G1CMKeepAliveClosure* oopClosure) :
-    _bitMap(bitMap),
+    _cm(cm),
     _markStack(markStack),
-    _oopClosure(oopClosure)
-  {}
+    _oopClosure(oopClosure) { }
 
   void do_void() {
-    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
+    _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
   }
 };
 
@@ -2102,8 +2559,7 @@
   CMTask* _task;
  public:
   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
-    _cm(cm), _task(task)
-  {}
+    _cm(cm), _task(task) { }
 
   void do_void() {
     do {
@@ -2242,9 +2698,9 @@
     rp->setup_policy(clear_all_soft_refs);
     assert(_markStack.isEmpty(), "mark stack should be empty");
 
-    G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
+    G1CMKeepAliveClosure g1_keep_alive(g1h, this);
     G1CMDrainMarkingStackClosure
-      g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
+      g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
 
     // We use the work gang from the G1CollectedHeap and we utilize all
     // the worker threads.
@@ -2616,18 +3072,6 @@
 // during an evacuation pause). This was a late change to the code and
 // is currently not being taken advantage of.
 
-class CMGlobalObjectClosure : public ObjectClosure {
-private:
-  ConcurrentMark* _cm;
-
-public:
-  void do_object(oop obj) {
-    _cm->deal_with_reference(obj);
-  }
-
-  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
-};
-
 void ConcurrentMark::deal_with_reference(oop obj) {
   if (verbose_high()) {
     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
@@ -2672,6 +3116,18 @@
   }
 }
 
+class CMGlobalObjectClosure : public ObjectClosure {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  void do_object(oop obj) {
+    _cm->deal_with_reference(obj);
+  }
+
+  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
+};
+
 void ConcurrentMark::drainAllSATBBuffers() {
   guarantee(false, "drainAllSATBBuffers(): don't call this any more");
 
@@ -2693,15 +3149,6 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::clear(oop p) {
-  assert(p != NULL && p->is_oop(), "expected an oop");
-  HeapWord* addr = (HeapWord*)p;
-  assert(addr >= _nextMarkBitMap->startWord() ||
-         addr < _nextMarkBitMap->endWord(), "in a region");
-
-  _nextMarkBitMap->clear(addr);
-}
-
 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
@@ -3015,6 +3462,192 @@
   }
 }
 
+// Aggregate the counting data that was constructed concurrently
+// with marking.
+class AggregateCountDataHRClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  size_t _max_task_num;
+
+ public:
+  AggregateCountDataHRClosure(ConcurrentMark *cm,
+                              BitMap* cm_card_bm,
+                              size_t max_task_num) :
+    _cm(cm), _cm_card_bm(cm_card_bm),
+    _max_task_num(max_task_num) { }
+
+  bool is_card_aligned(HeapWord* p) {
+    return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed.
+      // Note that we cannot rely on their associated
+      // "starts humongous" region to have their bit set to 1
+      // since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    HeapWord* start = hr->bottom();
+    HeapWord* limit = hr->next_top_at_mark_start();
+    HeapWord* end = hr->end();
+
+    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
+           err_msg("Preconditions not met - "
+                   "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
+                   "top: "PTR_FORMAT", end: "PTR_FORMAT,
+                   start, limit, hr->top(), hr->end()));
+
+    assert(hr->next_marked_bytes() == 0, "Precondition");
+
+    if (start == limit) {
+      // NTAMS of this region has not been set so nothing to do.
+      return false;
+    }
+
+    assert(is_card_aligned(start), "sanity");
+    assert(is_card_aligned(end), "sanity");
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
+
+    // If ntams is not card aligned then we bump the index for
+    // limit so that we get the card spanning ntams.
+    if (!is_card_aligned(limit)) {
+      limit_idx += 1;
+    }
+
+    assert(limit_idx <= end_idx, "or else use atomics");
+
+    // Aggregate the "stripe" in the count data associated with hr.
+    size_t hrs_index = hr->hrs_index();
+    size_t marked_bytes = 0;
+
+    for (int i = 0; (size_t)i < _max_task_num; i += 1) {
+      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
+      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
+
+      // Fetch the marked_bytes in this region for task i and
+      // add it to the running total for this region.
+      marked_bytes += marked_bytes_array[hrs_index];
+
+      // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
+      // into the global card bitmap.
+      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
+
+      while (scan_idx < limit_idx) {
+        assert(task_card_bm->at(scan_idx) == true, "should be");
+        _cm_card_bm->set_bit(scan_idx);
+        assert(_cm_card_bm->at(scan_idx) == true, "should be");
+
+        // BitMap::get_next_one_offset() can handle the case when
+        // its left_offset parameter is greater than its right_offset
+        // parameter. If does, however, have an early exit if
+        // left_offset == right_offset. So let's limit the value
+        // passed in for left offset here.
+        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
+        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
+      }
+    }
+
+    // Update the marked bytes for this region.
+    hr->add_to_marked_bytes(marked_bytes);
+
+    // Now set the top at count to NTAMS.
+    hr->set_top_at_conc_mark_count(limit);
+
+    // Next heap region
+    return false;
+  }
+};
+
+class G1AggregateCountDataTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  size_t _max_task_num;
+  int _active_workers;
+
+public:
+  G1AggregateCountDataTask(G1CollectedHeap* g1h,
+                           ConcurrentMark* cm,
+                           BitMap* cm_card_bm,
+                           size_t max_task_num,
+                           int n_workers) :
+    AbstractGangTask("Count Aggregation"),
+    _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
+    _max_task_num(max_task_num),
+    _active_workers(n_workers) { }
+
+  void work(uint worker_id) {
+    AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
+                                            _active_workers,
+                                            HeapRegion::AggregateCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&cl);
+    }
+  }
+};
+
+
+void ConcurrentMark::aggregate_count_data() {
+  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                        _g1h->workers()->active_workers() :
+                        1);
+
+  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
+                                           _max_task_num, n_workers);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");
+    _g1h->set_par_threads(n_workers);
+    _g1h->workers()->run_task(&g1_par_agg_task);
+    _g1h->set_par_threads(0);
+
+    assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
+           "sanity check");
+    _g1h->reset_heap_region_claim_values();
+  } else {
+    g1_par_agg_task.work(0);
+  }
+}
+
+// Clear the per-worker arrays used to store the per-region counting data
+void ConcurrentMark::clear_all_count_data() {
+  // Clear the global card bitmap - it will be filled during
+  // liveness count aggregation (during remark) and the
+  // final counting task.
+  _card_bm.clear();
+
+  // Clear the global region bitmap - it will be filled as part
+  // of the final counting task.
+  _region_bm.clear();
+
+  size_t max_regions = _g1h->max_regions();
+  assert(_max_task_num != 0, "unitialized");
+
+  for (int i = 0; (size_t) i < _max_task_num; i += 1) {
+    BitMap* task_card_bm = count_card_bitmap_for(i);
+    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
+
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    assert(marked_bytes_array != NULL, "uninitialized");
+
+    memset(marked_bytes_array, 0, (max_regions * sizeof(size_t)));
+    task_card_bm->clear();
+  }
+}
+
 void ConcurrentMark::print_stats() {
   if (verbose_stats()) {
     gclog_or_tty->print_cr("---------------------------------------------------------------------");
@@ -3350,6 +3983,8 @@
 void ConcurrentMark::abort() {
   // Clear all marks to force marking thread to do nothing
   _nextMarkBitMap->clearAll();
+  // Clear the liveness counting data
+  clear_all_count_data();
   // Empty mark stack
   clear_marking_state();
   for (int i = 0; i < (int)_max_task_num; ++i) {
@@ -3402,23 +4037,15 @@
                          (_init_times.sum() + _remark_times.sum() +
                           _cleanup_times.sum())/1000.0);
   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
-                "(%8.2f s marking, %8.2f s counting).",
+                "(%8.2f s marking).",
                 cmThread()->vtime_accum(),
-                cmThread()->vtime_mark_accum(),
-                cmThread()->vtime_count_accum());
+                cmThread()->vtime_mark_accum());
 }
 
 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
   _parallel_workers->print_worker_threads_on(st);
 }
 
-// Closures
-// XXX: there seems to be a lot of code  duplication here;
-// should refactor and consolidate the shared code.
-
-// This closure is used to mark refs into the CMS generation in
-// the CMS bit map. Called at the first checkpoint.
-
 // We take a break if someone is trying to stop the world.
 bool ConcurrentMark::do_yield_check(uint worker_id) {
   if (should_yield()) {
@@ -4704,6 +5331,8 @@
 
 CMTask::CMTask(int task_id,
                ConcurrentMark* cm,
+               size_t* marked_bytes,
+               BitMap* card_bm,
                CMTaskQueue* task_queue,
                CMTaskQueueSet* task_queues)
   : _g1h(G1CollectedHeap::heap()),
@@ -4713,7 +5342,9 @@
     _task_queue(task_queue),
     _task_queues(task_queues),
     _cm_oop_closure(NULL),
-    _aborted_region(MemRegion()) {
+    _aborted_region(MemRegion()),
+    _marked_bytes_array(marked_bytes),
+    _card_bm(card_bm) {
   guarantee(task_queue != NULL, "invariant");
   guarantee(task_queues != NULL, "invariant");
 
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -84,8 +84,8 @@
   }
 
   // iteration
-  bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
-  bool iterate(BitMapClosure* cl, MemRegion mr);
+  inline bool iterate(BitMapClosure* cl, MemRegion mr);
+  inline bool iterate(BitMapClosure* cl);
 
   // Return the address corresponding to the next marked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
@@ -349,10 +349,62 @@
   high_verbose       // per object verbose
 } CMVerboseLevel;
 
+class YoungList;
+
+// Root Regions are regions that are not empty at the beginning of a
+// marking cycle and which we might collect during an evacuation pause
+// while the cycle is active. Given that, during evacuation pauses, we
+// do not copy objects that are explicitly marked, what we have to do
+// for the root regions is to scan them and mark all objects reachable
+// from them. According to the SATB assumptions, we only need to visit
+// each object once during marking. So, as long as we finish this scan
+// before the next evacuation pause, we can copy the objects from the
+// root regions without having to mark them or do anything else to them.
+//
+// Currently, we only support root region scanning once (at the start
+// of the marking cycle) and the root regions are all the survivor
+// regions populated during the initial-mark pause.
+class CMRootRegions VALUE_OBJ_CLASS_SPEC {
+private:
+  YoungList*           _young_list;
+  ConcurrentMark*      _cm;
+
+  volatile bool        _scan_in_progress;
+  volatile bool        _should_abort;
+  HeapRegion* volatile _next_survivor;
+
+public:
+  CMRootRegions();
+  // We actually do most of the initialization in this method.
+  void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
+
+  // Reset the claiming / scanning of the root regions.
+  void prepare_for_scan();
+
+  // Forces get_next() to return NULL so that the iteration aborts early.
+  void abort() { _should_abort = true; }
+
+  // Return true if the CM thread are actively scanning root regions,
+  // false otherwise.
+  bool scan_in_progress() { return _scan_in_progress; }
+
+  // Claim the next root region to scan atomically, or return NULL if
+  // all have been claimed.
+  HeapRegion* claim_next();
+
+  // Flag that we're done with root region scanning and notify anyone
+  // who's waiting on it. If aborted is false, assume that all regions
+  // have been claimed.
+  void scan_finished();
+
+  // If CM threads are still scanning root regions, wait until they
+  // are done. Return true if we had to wait, false otherwise.
+  bool wait_until_scan_finished();
+};
 
 class ConcurrentMarkThread;
 
-class ConcurrentMark: public CHeapObj {
+class ConcurrentMark : public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -386,7 +438,7 @@
 
   FreeRegionList        _cleanup_list;
 
-  // CMS marking support structures
+  // Concurrent marking support structures
   CMBitMap                _markBitMap1;
   CMBitMap                _markBitMap2;
   CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
@@ -400,6 +452,9 @@
   HeapWord*               _heap_start;
   HeapWord*               _heap_end;
 
+  // Root region tracking and claiming.
+  CMRootRegions           _root_regions;
+
   // For gray objects
   CMMarkStack             _markStack; // Grey objects behind global finger.
   CMRegionStack           _regionStack; // Grey regions behind global finger.
@@ -426,7 +481,6 @@
   WorkGangBarrierSync     _first_overflow_barrier_sync;
   WorkGangBarrierSync     _second_overflow_barrier_sync;
 
-
   // this is set by any task, when an overflow on the global data
   // structures is detected.
   volatile bool           _has_overflown;
@@ -554,9 +608,9 @@
   bool has_overflown()           { return _has_overflown; }
   void set_has_overflown()       { _has_overflown = true; }
   void clear_has_overflown()     { _has_overflown = false; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   bool has_aborted()             { return _has_aborted; }
-  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   // Methods to enter the two overflow sync barriers
   void enter_first_sync_barrier(int task_num);
@@ -578,6 +632,27 @@
     }
   }
 
+  // Live Data Counting data structures...
+  // These data structures are initialized at the start of
+  // marking. They are written to while marking is active.
+  // They are aggregated during remark; the aggregated values
+  // are then used to populate the _region_bm, _card_bm, and
+  // the total live bytes, which are then subsequently updated
+  // during cleanup.
+
+  // An array of bitmaps (one bit map per task). Each bitmap
+  // is used to record the cards spanned by the live objects
+  // marked by that task/worker.
+  BitMap*  _count_card_bitmaps;
+
+  // Used to record the number of marked live bytes
+  // (for each region, by worker thread).
+  size_t** _count_marked_bytes;
+
+  // Card index of the bottom of the G1 heap. Used for biasing indices into
+  // the card bitmaps.
+  intptr_t _heap_bottom_card_num;
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
@@ -671,6 +746,8 @@
   // Returns true if there are any aborted memory regions.
   bool has_aborted_regions();
 
+  CMRootRegions* root_regions() { return &_root_regions; }
+
   bool concurrent_marking_in_progress() {
     return _concurrent_marking_in_progress;
   }
@@ -703,6 +780,7 @@
 
   ConcurrentMark(ReservedSpace rs, int max_regions);
   ~ConcurrentMark();
+
   ConcurrentMarkThread* cmThread() { return _cmThread; }
 
   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
@@ -720,8 +798,17 @@
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed. It is MT-safe.
-  inline void grayRoot(oop obj, size_t word_size);
+  // grayed. It is MT-safe. word_size is the size of the object in
+  // words. It is passed explicitly as sometimes we cannot calculate
+  // it from the given object because it might be in an inconsistent
+  // state (e.g., in to-space and being copied). So the caller is
+  // responsible for dealing with this issue (e.g., get the size from
+  // the from-space image when the to-space image might be
+  // inconsistent) and always passing the size. hr is the region that
+  // contains the object and it's passed optionally from callers who
+  // might already have it (no point in recalculating it).
+  inline void grayRoot(oop obj, size_t word_size,
+                       uint worker_id, HeapRegion* hr = NULL);
 
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
@@ -772,6 +859,13 @@
   void checkpointRootsInitialPre();
   void checkpointRootsInitialPost();
 
+  // Scan all the root regions and mark everything reachable from
+  // them.
+  void scanRootRegions();
+
+  // Scan a single root region and mark everything reachable from it.
+  void scanRootRegion(HeapRegion* hr, uint worker_id);
+
   // Do concurrent phase of marking, to a tentative transitive closure.
   void markFromRoots();
 
@@ -781,15 +875,13 @@
 
   void checkpointRootsFinal(bool clear_all_soft_refs);
   void checkpointRootsFinalWork();
-  void calcDesiredRegions();
   void cleanup();
   void completeCleanup();
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
   inline void markPrev(oop p);
-  inline void markNext(oop p);
-  void clear(oop p);
+
   // Clears marks for all objects in the given range, for the prev,
   // next, or both bitmaps.  NB: the previous bitmap is usually
   // read-only, so use this carefully!
@@ -913,6 +1005,114 @@
   bool verbose_high() {
     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
   }
+
+  // Counting data structure accessors
+
+  // Returns the card number of the bottom of the G1 heap.
+  // Used in biasing indices into accounting card bitmaps.
+  intptr_t heap_bottom_card_num() const {
+    return _heap_bottom_card_num;
+  }
+
+  // Returns the card bitmap for a given task or worker id.
+  BitMap* count_card_bitmap_for(uint worker_id) {
+    assert(0 <= worker_id && worker_id < _max_task_num, "oob");
+    assert(_count_card_bitmaps != NULL, "uninitialized");
+    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    return task_card_bm;
+  }
+
+  // Returns the array containing the marked bytes for each region,
+  // for the given worker or task id.
+  size_t* count_marked_bytes_array_for(uint worker_id) {
+    assert(0 <= worker_id && worker_id < _max_task_num, "oob");
+    assert(_count_marked_bytes != NULL, "uninitialized");
+    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
+    assert(marked_bytes_array != NULL, "uninitialized");
+    return marked_bytes_array;
+  }
+
+  // Returns the index in the liveness accounting card table bitmap
+  // for the given address
+  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
+
+  // Counts the size of the given memory region in the the given
+  // marked_bytes array slot for the given HeapRegion.
+  // Sets the bits in the given card bitmap that are associated with the
+  // cards that are spanned by the memory region.
+  inline void count_region(MemRegion mr, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
+
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, uint worker_id);
+
+  // Counts the given object in the given task/worker counting
+  // data structures.
+  inline void count_object(oop obj, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given object in the task/worker counting data
+  // structures for the given worker id.
+  inline void count_object(oop obj, HeapRegion* hr, uint worker_id);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the given task/worker counting structures.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr,
+                                 size_t* marked_bytes_array,
+                                 BitMap* task_card_bm);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, size_t word_size,
+                                 HeapRegion* hr, uint worker_id);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr, uint worker_id);
+
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  inline bool par_mark_and_count(oop obj, uint worker_id);
+
+  // Similar to the above routine but there are times when we cannot
+  // safely calculate the size of obj due to races and we, therefore,
+  // pass the size in as a parameter. It is the caller's reponsibility
+  // to ensure that the size passed in for obj is valid.
+  inline bool par_mark_and_count(oop obj, size_t word_size, uint worker_id);
+
+  // Unconditionally mark the given object, and unconditinally count
+  // the object in the counting structures for worker id 0.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj, HeapRegion* hr);
+
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj);
+
+protected:
+  // Clear all the per-task bitmaps and arrays used to store the
+  // counting data.
+  void clear_all_count_data();
+
+  // Aggregates the counting data for each worker/task
+  // that was constructed while marking. Also sets
+  // the amount of marked bytes for each region and
+  // the top at concurrent mark count.
+  void aggregate_count_data();
+
+  // Verification routine
+  void verify_count_data();
 };
 
 // A class representing a marking task.
@@ -1031,6 +1231,12 @@
 
   TruncatedSeq                _marking_step_diffs_ms;
 
+  // Counting data structures. Embedding the task's marked_bytes_array
+  // and card bitmap into the actual task saves having to go through
+  // the ConcurrentMark object.
+  size_t*                     _marked_bytes_array;
+  BitMap*                     _card_bm;
+
   // LOTS of statistics related with this task
 #if _MARKING_STATS_
   NumberSeq                   _all_clock_intervals_ms;
@@ -1196,6 +1402,7 @@
   }
 
   CMTask(int task_num, ConcurrentMark *cm,
+         size_t* marked_bytes, BitMap* card_bm,
          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
 
   // it prints statistics associated with this task
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -28,6 +28,214 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 
+// Returns the index in the liveness accounting card bitmap
+// for the given address
+inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
+  // Below, the term "card num" means the result of shifting an address
+  // by the card shift -- address 0 corresponds to card number 0.  One
+  // must subtract the card num of the bottom of the heap to obtain a
+  // card table index.
+
+  intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
+  return card_num - heap_bottom_card_num();
+}
+
+// Counts the given memory region in the given task/worker
+// counting data structures.
+inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
+                                         size_t* marked_bytes_array,
+                                         BitMap* task_card_bm) {
+  G1CollectedHeap* g1h = _g1h;
+  HeapWord* start = mr.start();
+  HeapWord* last = mr.last();
+  size_t region_size_bytes = mr.byte_size();
+  size_t index = hr->hrs_index();
+
+  assert(!hr->continuesHumongous(), "should not be HC region");
+  assert(hr == g1h->heap_region_containing(start), "sanity");
+  assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
+  assert(marked_bytes_array != NULL, "pre-condition");
+  assert(task_card_bm != NULL, "pre-condition");
+
+  // Add to the task local marked bytes for this region.
+  marked_bytes_array[index] += region_size_bytes;
+
+  BitMap::idx_t start_idx = card_bitmap_index_for(start);
+  BitMap::idx_t last_idx = card_bitmap_index_for(last);
+
+  // The card bitmap is task/worker specific => no need to use 'par' routines.
+  // Set bits in the inclusive bit range [start_idx, last_idx].
+  //
+  // For small ranges use a simple loop; otherwise use set_range
+  // The range are the cards that are spanned by the object/region
+  // so 8 cards will allow objects/regions up to 4K to be handled
+  // using the loop.
+  if ((last_idx - start_idx) <= 8) {
+    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+     task_card_bm->set_bit(i);
+    }
+  } else {
+    assert(last_idx < task_card_bm->size(), "sanity");
+    // Note: BitMap::set_range() is exclusive.
+    task_card_bm->set_range(start_idx, last_idx+1);
+  }
+}
+
+// Counts the given memory region in the task/worker counting
+// data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
+// Counts the given memory region, which may be a single object, in the
+// task/worker counting data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) {
+  HeapWord* addr = mr.start();
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  count_region(mr, hr, worker_id);
+}
+
+// Counts the given object in the given task/worker counting data structures.
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         size_t* marked_bytes_array,
+                                         BitMap* task_card_bm) {
+  MemRegion mr((HeapWord*)obj, obj->size());
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
+// Counts the given object in the task/worker counting data
+// structures for the given worker id.
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  HeapWord* addr = (HeapWord*) obj;
+  count_object(obj, hr, marked_bytes_array, task_card_bm);
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the given task/worker counting structures.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               HeapRegion* hr,
+                                               size_t* marked_bytes_array,
+                                               BitMap* task_card_bm) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, marked_bytes_array, task_card_bm);
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    MemRegion mr(addr, word_size);
+    count_region(mr, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// As above - but we don't know the heap region containing the
+// object and so have to supply it.
+inline bool ConcurrentMark::par_mark_and_count(oop obj, uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  return par_mark_and_count(obj, hr, worker_id);
+}
+
+// Similar to the above routine but we already know the size, in words, of
+// the object that we wish to mark/count
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    MemRegion mr(addr, word_size);
+    count_region(mr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// Unconditionally mark the given object, and unconditinally count
+// the object in the counting structures for worker id 0.
+// Should *not* be called from parallel code.
+inline bool ConcurrentMark::mark_and_count(oop obj, HeapRegion* hr) {
+  HeapWord* addr = (HeapWord*)obj;
+  _nextMarkBitMap->mark(addr);
+  // Update the task specific count data for the object.
+  count_object(obj, hr, 0 /* worker_id */);
+  return true;
+}
+
+// As above - but we don't have the heap region containing the
+// object, so we have to supply it.
+inline bool ConcurrentMark::mark_and_count(oop obj) {
+  HeapWord* addr = (HeapWord*)obj;
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  return mark_and_count(obj, hr);
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* start_addr = MAX2(startWord(), mr.start());
+  HeapWord* end_addr = MIN2(endWord(), mr.end());
+
+  if (end_addr > start_addr) {
+    // Right-open interval [start-offset, end-offset).
+    BitMap::idx_t start_offset = heapWordToOffset(start_addr);
+    BitMap::idx_t end_offset = heapWordToOffset(end_addr);
+
+    start_offset = _bm.get_next_one_offset(start_offset, end_offset);
+    while (start_offset < end_offset) {
+      HeapWord* obj_addr = offsetToHeapWord(start_offset);
+      oop obj = (oop) obj_addr;
+      if (!cl->do_bit(start_offset)) {
+        return false;
+      }
+      HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr);
+      BitMap::idx_t next_offset = heapWordToOffset(next_addr);
+      start_offset = _bm.get_next_one_offset(next_offset, end_offset);
+    }
+  }
+  return true;
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
+  MemRegion mr(startWord(), sizeInWords());
+  return iterate(cl, mr);
+}
+
 inline void CMTask::push(oop obj) {
   HeapWord* objAddr = (HeapWord*) obj;
   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
@@ -84,7 +292,7 @@
 
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
- if (_g1h->is_in_g1_reserved(objAddr)) {
+  if (_g1h->is_in_g1_reserved(objAddr)) {
     assert(obj != NULL, "null check is implicit");
     if (!_nextMarkBitMap->isMarked(objAddr)) {
       // Only get the containing region if the object is not marked on the
@@ -98,9 +306,9 @@
         }
 
         // we need to mark it first
-        if (_nextMarkBitMap->parMark(objAddr)) {
+        if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
           // No OrderAccess:store_load() is needed. It is implicit in the
-          // CAS done in parMark(objAddr) above
+          // CAS done in CMBitMap::parMark() call in the routine above.
           HeapWord* global_finger = _cm->finger();
 
 #if _CHECK_BOTH_FINGERS_
@@ -160,25 +368,20 @@
   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
 }
 
-inline void ConcurrentMark::markNext(oop p) {
-  assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
-  _nextMarkBitMap->mark((HeapWord*) p);
-}
-
-inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
+                                     uint worker_id, HeapRegion* hr) {
+  assert(obj != NULL, "pre-condition");
   HeapWord* addr = (HeapWord*) obj;
+  if (hr == NULL) {
+    hr = _g1h->heap_region_containing_raw(addr);
+  } else {
+    assert(hr->is_in(addr), "pre-condition");
+  }
+  assert(hr != NULL, "sanity");
+  // Given that we're looking for a region that contains an object
+  // header it's impossible to get back a HC region.
+  assert(!hr->continuesHumongous(), "sanity");
 
-  // Currently we don't do anything with word_size but we will use it
-  // in the very near future in the liveness calculation piggy-backing
-  // changes.
-
-#ifdef ASSERT
-  HeapRegion* hr = _g1h->heap_region_containing(addr);
-  assert(hr != NULL, "sanity");
-  assert(!hr->is_survivor(), "should not allocate survivors during IM");
-  assert(addr < hr->next_top_at_mark_start(),
-         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
-                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
   // We cannot assert that word_size == obj->size() given that obj
   // might not be in a consistent state (another thread might be in
   // the process of copying it). So the best thing we can do is to
@@ -188,10 +391,11 @@
          err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
                  word_size * HeapWordSize, hr->capacity(),
                  HR_FORMAT_PARAMS(hr)));
-#endif // ASSERT
 
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
+  if (addr < hr->next_top_at_mark_start()) {
+    if (!_nextMarkBitMap->isMarked(addr)) {
+      par_mark_and_count(obj, word_size, hr, worker_id);
+    }
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,9 +44,7 @@
   _started(false),
   _in_progress(false),
   _vtime_accum(0.0),
-  _vtime_mark_accum(0.0),
-  _vtime_count_accum(0.0)
-{
+  _vtime_mark_accum(0.0) {
   create_and_start();
 }
 
@@ -94,9 +92,36 @@
       ResourceMark rm;
       HandleMark   hm;
       double cycle_start = os::elapsedVTime();
-      double mark_start_sec = os::elapsedTime();
       char verbose_str[128];
 
+      // We have to ensure that we finish scanning the root regions
+      // before the next GC takes place. To ensure this we have to
+      // make sure that we do not join the STS until the root regions
+      // have been scanned. If we did then it's possible that a
+      // subsequent GC could block us from joining the STS and proceed
+      // without the root regions have been scanned which would be a
+      // correctness issue.
+
+      double scan_start = os::elapsedTime();
+      if (!cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
+        }
+
+        _cm->scanRootRegions();
+
+        double scan_end = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf]",
+                                 scan_end - scan_start);
+        }
+      }
+
+      double mark_start_sec = os::elapsedTime();
       if (PrintGC) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
@@ -148,36 +173,12 @@
         }
       } while (cm()->restart_for_overflow());
 
-      double counting_start_time = os::elapsedVTime();
-      if (!cm()->has_aborted()) {
-        double count_start_sec = os::elapsedTime();
-        if (PrintGC) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
-          gclog_or_tty->print_cr("[GC concurrent-count-start]");
-        }
-
-        _sts.join();
-        _cm->calcDesiredRegions();
-        _sts.leave();
-
-        if (!cm()->has_aborted()) {
-          double count_end_sec = os::elapsedTime();
-          if (PrintGC) {
-            gclog_or_tty->date_stamp(PrintGCDateStamps);
-            gclog_or_tty->stamp(PrintGCTimeStamps);
-            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
-                                   count_end_sec - count_start_sec);
-          }
-        }
-      }
-
       double end_time = os::elapsedVTime();
-      _vtime_count_accum += (end_time - counting_start_time);
       // Update the total virtual time before doing this, since it will try
       // to measure it to get the vtime for this marking.  We purposely
       // neglect the presumably-short "completeCleanup" phase here.
       _vtime_accum = (end_time - _vtime_start);
+
       if (!cm()->has_aborted()) {
         if (g1_policy->adaptive_young_list_length()) {
           double now = os::elapsedTime();
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,6 @@
   double _vtime_accum;  // Accumulated virtual time.
 
   double _vtime_mark_accum;
-  double _vtime_count_accum;
 
  public:
   virtual void run();
@@ -69,8 +68,6 @@
   double vtime_accum();
   // Marking virtual time so far
   double vtime_mark_accum();
-  // Counting virtual time so far.
-  double vtime_count_accum() { return _vtime_count_accum; }
 
   ConcurrentMark* cm()     { return _cm; }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -174,13 +174,10 @@
   }
 };
 
-YoungList::YoungList(G1CollectedHeap* g1h)
-  : _g1h(g1h), _head(NULL),
-    _length(0),
-    _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
-{
-  guarantee( check_list_empty(false), "just making sure..." );
+YoungList::YoungList(G1CollectedHeap* g1h) :
+    _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) {
+  guarantee(check_list_empty(false), "just making sure...");
 }
 
 void YoungList::push_region(HeapRegion *hr) {
@@ -1029,6 +1026,15 @@
   assert(isHumongous(word_size), "attempt_allocation_humongous() "
          "should only be called for humongous allocations");
 
+  // Humongous objects can exhaust the heap quickly, so we should check if we
+  // need to start a marking cycle at each humongous object allocation. We do
+  // the check before we do the actual allocation. The reason for doing it
+  // before the allocation is that we avoid having to keep track of the newly
+  // allocated memory while we do a GC.
+  if (g1_policy()->need_to_start_conc_mark("concurrent humongous allocation", word_size)) {
+    collect(GCCause::_g1_humongous_allocation);
+  }
+
   // We will loop until a) we manage to successfully perform the
   // allocation or b) we successfully schedule a collection which
   // fails to perform the allocation. b) is the only case when we'll
@@ -1111,7 +1117,11 @@
     return _mutator_alloc_region.attempt_allocation_locked(word_size,
                                                       false /* bot_updates */);
   } else {
-    return humongous_obj_allocate(word_size);
+    HeapWord* result = humongous_obj_allocate(word_size);
+    if (result != NULL && g1_policy()->need_to_start_conc_mark("STW humongous allocation")) {
+      g1_policy()->set_initiate_conc_mark_if_possible();
+    }
+    return result;
   }
 
   ShouldNotReachHere();
@@ -1257,7 +1267,18 @@
     double start = os::elapsedTime();
     g1_policy()->record_full_collection_start();
 
+    // Note: When we have a more flexible GC logging framework that
+    // allows us to add optional attributes to a GC log record we
+    // could consider timing and reporting how long we wait in the
+    // following two methods.
     wait_while_free_regions_coming();
+    // If we start the compaction before the CM threads finish
+    // scanning the root regions we might trip them over as we'll
+    // be moving objects / updating references. So let's wait until
+    // they are done. By telling them to abort, they should complete
+    // early.
+    _cm->root_regions()->abort();
+    _cm->root_regions()->wait_until_scan_finished();
     append_secondary_free_list_if_not_empty_with_lock();
 
     gc_prologue(true);
@@ -1286,7 +1307,8 @@
     ref_processor_cm()->verify_no_references_recorded();
 
     // Abandon current iterations of concurrent marking and concurrent
-    // refinement, if any are in progress.
+    // refinement, if any are in progress. We have to do this before
+    // wait_until_scan_finished() below.
     concurrent_mark()->abort();
 
     // Make sure we'll choose a new allocation region afterwards.
@@ -2295,7 +2317,8 @@
 bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
   return
     ((cause == GCCause::_gc_locker           && GCLockerInvokesConcurrent) ||
-     (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
+     (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) ||
+      cause == GCCause::_g1_humongous_allocation);
 }
 
 #ifndef PRODUCT
@@ -3545,19 +3568,25 @@
   verify_region_sets_optional();
   verify_dirty_young_regions();
 
+  // This call will decide whether this pause is an initial-mark
+  // pause. If it is, during_initial_mark_pause() will return true
+  // for the duration of this pause.
+  g1_policy()->decide_on_conc_mark_initiation();
+
+  // We do not allow initial-mark to be piggy-backed on a mixed GC.
+  assert(!g1_policy()->during_initial_mark_pause() ||
+          g1_policy()->gcs_are_young(), "sanity");
+
+  // We also do not allow mixed GCs during marking.
+  assert(!mark_in_progress() || g1_policy()->gcs_are_young(), "sanity");
+
+  // Record whether this pause is an initial mark. When the current
+  // thread has completed its logging output and it's safe to signal
+  // the CM thread, the flag's value in the policy has been reset.
+  bool should_start_conc_mark = g1_policy()->during_initial_mark_pause();
+
+  // Inner scope for scope based logging, timers, and stats collection
   {
-    // This call will decide whether this pause is an initial-mark
-    // pause. If it is, during_initial_mark_pause() will return true
-    // for the duration of this pause.
-    g1_policy()->decide_on_conc_mark_initiation();
-
-    // We do not allow initial-mark to be piggy-backed on a mixed GC.
-    assert(!g1_policy()->during_initial_mark_pause() ||
-            g1_policy()->gcs_are_young(), "sanity");
-
-    // We also do not allow mixed GCs during marking.
-    assert(!mark_in_progress() || g1_policy()->gcs_are_young(), "sanity");
-
     char verbose_str[128];
     sprintf(verbose_str, "GC pause ");
     if (g1_policy()->gcs_are_young()) {
@@ -3613,7 +3642,6 @@
         Universe::verify(/* allow dirty */ false,
                          /* silent      */ false,
                          /* option      */ VerifyOption_G1UsePrevMarking);
-
       }
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -3656,6 +3684,18 @@
         g1_policy()->record_collection_pause_start(start_time_sec,
                                                    start_used_bytes);
 
+        double scan_wait_start = os::elapsedTime();
+        // We have to wait until the CM threads finish scanning the
+        // root regions as it's the only way to ensure that all the
+        // objects on them have been correctly scanned before we start
+        // moving them during the GC.
+        bool waited = _cm->root_regions()->wait_until_scan_finished();
+        if (waited) {
+          double scan_wait_end = os::elapsedTime();
+          double wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
+          g1_policy()->record_root_region_scan_wait_time(wait_time_ms);
+        }
+
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:");
         _young_list->print();
@@ -3765,16 +3805,14 @@
         }
 
         if (g1_policy()->during_initial_mark_pause()) {
+          // We have to do this before we notify the CM threads that
+          // they can start working to make sure that all the
+          // appropriate initialization is done on the CM object.
           concurrent_mark()->checkpointRootsInitialPost();
           set_marking_started();
-          // CAUTION: after the doConcurrentMark() call below,
-          // the concurrent marking thread(s) could be running
-          // concurrently with us. Make sure that anything after
-          // this point does not assume that we are the only GC thread
-          // running. Note: of course, the actual marking work will
-          // not start until the safepoint itself is released in
-          // ConcurrentGCThread::safepoint_desynchronize().
-          doConcurrentMark();
+          // Note that we don't actually trigger the CM thread at
+          // this point. We do that later when we're sure that
+          // the current thread has completed its logging output.
         }
 
         allocate_dummy_regions();
@@ -3884,6 +3922,15 @@
     }
   }
 
+  // The closing of the inner scope, immediately above, will complete
+  // the PrintGC logging output. The record_collection_pause_end() call
+  // above will complete the logging output of PrintGCDetails.
+  //
+  // It is not yet to safe, however, to tell the concurrent mark to
+  // start as we have some optional output below. We don't want the
+  // output from the concurrent mark thread interfering with this
+  // logging output either.
+
   _hrs.verify_optional();
   verify_region_sets_optional();
 
@@ -3901,6 +3948,21 @@
     g1_rem_set()->print_summary_info();
   }
 
+  // It should now be safe to tell the concurrent mark thread to start
+  // without its logging output interfering with the logging output
+  // that came from the pause.
+
+  if (should_start_conc_mark) {
+    // CAUTION: after the doConcurrentMark() call below,
+    // the concurrent marking thread(s) could be running
+    // concurrently with us. Make sure that anything after
+    // this point does not assume that we are the only GC thread
+    // running. Note: of course, the actual marking work will
+    // not start until the safepoint itself is released in
+    // ConcurrentGCThread::safepoint_desynchronize().
+    doConcurrentMark();
+  }
+
   return true;
 }
 
@@ -4162,7 +4224,7 @@
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
   ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num)
   : _g1h(g1h),
     _refs(g1h->task_queue(queue_num)),
     _dcq(&g1h->dirty_card_queue_set()),
@@ -4283,6 +4345,7 @@
                                      G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
+  _worker_id(par_scan_state->queue_num()),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
@@ -4294,7 +4357,7 @@
 #endif // ASSERT
 
   // We know that the object is not moving so it's safe to read its size.
-  _cm->grayRoot(obj, (size_t) obj->size());
+  _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
 }
 
 void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
@@ -4316,7 +4379,7 @@
   // worker so we cannot trust that its to-space image is
   // well-formed. So we have to read its size from its from-space
   // image which we know should not be changing.
-  _cm->grayRoot(to_obj, (size_t) from_obj->size());
+  _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
 }
 
 oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
@@ -4406,6 +4469,8 @@
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is non-NULL");
 
+  assert(_worker_id == _par_scan_state->queue_num(), "sanity");
+
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
     oop forwardee;
@@ -4424,7 +4489,7 @@
 
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
-      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+      _par_scan_state->update_rs(_from, p, _worker_id);
     }
   } else {
     // The object is not in collection set. If we're a root scanning
@@ -4436,7 +4501,7 @@
   }
 
   if (barrier == G1BarrierEvac && obj != NULL) {
-    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+    _par_scan_state->update_rs(_from, p, _worker_id);
   }
 
   if (do_gen_barrier && obj != NULL) {
@@ -5666,16 +5731,6 @@
 
       // And the region is empty.
       assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
-
-      // If marking is in progress then clear any objects marked in
-      // the current region. Note mark_in_progress() returns false,
-      // even during an initial mark pause, until the set_marking_started()
-      // call which takes place later in the pause.
-      if (mark_in_progress()) {
-        assert(!g1_policy()->during_initial_mark_pause(), "sanity");
-        _cm->nextMarkBitMap()->clearRange(used_mr);
-      }
-
       free_region(cur, &pre_used, &local_free_list, false /* par */);
     } else {
       cur->uninstall_surv_rate_group();
@@ -5742,8 +5797,9 @@
 }
 
 void G1CollectedHeap::reset_free_regions_coming() {
+  assert(free_regions_coming(), "pre-condition");
+
   {
-    assert(free_regions_coming(), "pre-condition");
     MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
     _free_regions_coming = false;
     SecondaryFreeList_lock->notify_all();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -355,6 +355,7 @@
   // explicitly started if:
   // (a) cause == _gc_locker and +GCLockerInvokesConcurrent, or
   // (b) cause == _java_lang_system_gc and +ExplicitGCInvokesConcurrent.
+  // (c) cause == _g1_humongous_allocation
   bool should_do_concurrent_full_gc(GCCause::Cause cause);
 
   // Keeps track of how many "full collections" (i.e., Full GCs or
@@ -1172,6 +1173,10 @@
     _old_set.remove(hr);
   }
 
+  size_t non_young_capacity_bytes() {
+    return _old_set.total_capacity_bytes() + _humongous_set.total_capacity_bytes();
+  }
+
   void set_free_regions_coming();
   void reset_free_regions_coming();
   bool free_regions_coming() { return _free_regions_coming; }
@@ -1904,7 +1909,7 @@
   G1ParScanPartialArrayClosure* _partial_scan_cl;
 
   int _hash_seed;
-  int _queue_num;
+  uint _queue_num;
 
   size_t _term_attempts;
 
@@ -1948,7 +1953,7 @@
   }
 
 public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num);
 
   ~G1ParScanThreadState() {
     FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
@@ -2040,7 +2045,7 @@
   }
 
   int* hash_seed() { return &_hash_seed; }
-  int  queue_num() { return _queue_num; }
+  uint queue_num() { return _queue_num; }
 
   size_t term_attempts() const  { return _term_attempts; }
   void note_term_attempt() { _term_attempts++; }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -141,6 +141,7 @@
 
   _cur_clear_ct_time_ms(0.0),
   _mark_closure_time_ms(0.0),
+  _root_region_scan_wait_time_ms(0.0),
 
   _cur_ref_proc_time_ms(0.0),
   _cur_ref_enq_time_ms(0.0),
@@ -213,8 +214,6 @@
   _survivor_bytes_before_gc(0),
   _capacity_before_gc(0),
 
-  _prev_collection_pause_used_at_end_bytes(0),
-
   _eden_cset_region_length(0),
   _survivor_cset_region_length(0),
   _old_cset_region_length(0),
@@ -905,19 +904,10 @@
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  if (!during_initial_mark_pause()) {
-    // We only need to do this here as the policy will only be applied
-    // to the GC we're about to start. so, no point is calculating this
-    // every time we calculate / recalculate the target young length.
-    update_survivors_policy();
-  } else {
-    // The marking phase has a "we only copy implicitly live
-    // objects during marking" invariant. The easiest way to ensure it
-    // holds is not to allocate any survivor regions and tenure all
-    // objects. In the future we might change this and handle survivor
-    // regions specially during marking.
-    tenure_all_objects();
-  }
+  // We only need to do this here as the policy will only be applied
+  // to the GC we're about to start. so, no point is calculating this
+  // every time we calculate / recalculate the target young length.
+  update_survivors_policy();
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -969,6 +959,9 @@
   // This is initialized to zero here and is set during
   // the evacuation pause if marking is in progress.
   _cur_satb_drain_time_ms = 0.0;
+  // This is initialized to zero here and is set during the evacuation
+  // pause if we actually waited for the root region scanning to finish.
+  _root_region_scan_wait_time_ms = 0.0;
 
   _last_gc_was_young = false;
 
@@ -1140,6 +1133,50 @@
   return ret;
 }
 
+bool G1CollectorPolicy::need_to_start_conc_mark(const char* source, size_t alloc_word_size) {
+  if (_g1->concurrent_mark()->cmThread()->during_cycle()) {
+    return false;
+  }
+
+  size_t marking_initiating_used_threshold =
+    (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
+  size_t cur_used_bytes = _g1->non_young_capacity_bytes();
+  size_t alloc_byte_size = alloc_word_size * HeapWordSize;
+
+  if ((cur_used_bytes + alloc_byte_size) > marking_initiating_used_threshold) {
+    if (gcs_are_young()) {
+      ergo_verbose5(ErgoConcCycles,
+        "request concurrent cycle initiation",
+        ergo_format_reason("occupancy higher than threshold")
+        ergo_format_byte("occupancy")
+        ergo_format_byte("allocation request")
+        ergo_format_byte_perc("threshold")
+        ergo_format_str("source"),
+        cur_used_bytes,
+        alloc_byte_size,
+        marking_initiating_used_threshold,
+        (double) InitiatingHeapOccupancyPercent,
+        source);
+      return true;
+    } else {
+      ergo_verbose5(ErgoConcCycles,
+        "do not request concurrent cycle initiation",
+        ergo_format_reason("still doing mixed collections")
+        ergo_format_byte("occupancy")
+        ergo_format_byte("allocation request")
+        ergo_format_byte_perc("threshold")
+        ergo_format_str("source"),
+        cur_used_bytes,
+        alloc_byte_size,
+        marking_initiating_used_threshold,
+        (double) InitiatingHeapOccupancyPercent,
+        source);
+    }
+  }
+
+  return false;
+}
+
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
@@ -1166,44 +1203,16 @@
 #endif // PRODUCT
 
   last_pause_included_initial_mark = during_initial_mark_pause();
-  if (last_pause_included_initial_mark)
+  if (last_pause_included_initial_mark) {
     record_concurrent_mark_init_end(0.0);
-
-  size_t marking_initiating_used_threshold =
-    (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
-
-  if (!_g1->mark_in_progress() && !_last_young_gc) {
-    assert(!last_pause_included_initial_mark, "invariant");
-    if (cur_used_bytes > marking_initiating_used_threshold) {
-      if (cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
-        assert(!during_initial_mark_pause(), "we should not see this here");
-
-        ergo_verbose3(ErgoConcCycles,
-                      "request concurrent cycle initiation",
-                      ergo_format_reason("occupancy higher than threshold")
-                      ergo_format_byte("occupancy")
-                      ergo_format_byte_perc("threshold"),
-                      cur_used_bytes,
-                      marking_initiating_used_threshold,
-                      (double) InitiatingHeapOccupancyPercent);
-
-        // Note: this might have already been set, if during the last
-        // pause we decided to start a cycle but at the beginning of
-        // this pause we decided to postpone it. That's OK.
-        set_initiate_conc_mark_if_possible();
-      } else {
-        ergo_verbose2(ErgoConcCycles,
-                  "do not request concurrent cycle initiation",
-                  ergo_format_reason("occupancy lower than previous occupancy")
-                  ergo_format_byte("occupancy")
-                  ergo_format_byte("previous occupancy"),
-                  cur_used_bytes,
-                  _prev_collection_pause_used_at_end_bytes);
-      }
-    }
   }
 
-  _prev_collection_pause_used_at_end_bytes = cur_used_bytes;
+  if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
+    // Note: this might have already been set, if during the last
+    // pause we decided to start a cycle but at the beginning of
+    // this pause we decided to postpone it. That's OK.
+    set_initiate_conc_mark_if_possible();
+  }
 
   _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
                           end_time_sec, false);
@@ -1257,6 +1266,10 @@
   // is in progress.
   other_time_ms -= _cur_satb_drain_time_ms;
 
+  // Subtract the root region scanning wait time. It's initialized to
+  // zero at the start of the pause.
+  other_time_ms -= _root_region_scan_wait_time_ms;
+
   if (parallel) {
     other_time_ms -= _cur_collection_par_time_ms;
   } else {
@@ -1289,6 +1302,8 @@
     // each other. Therefore we unconditionally record the SATB drain
     // time - even if it's zero.
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    body_summary->record_root_region_scan_wait_time_ms(
+                                               _root_region_scan_wait_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
     body_summary->record_satb_filtering_time_ms(satb_filtering_time);
@@ -1385,6 +1400,9 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
+    if (_root_region_scan_wait_time_ms > 0.0) {
+      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+    }
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
@@ -1988,6 +2006,7 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
+      print_summary(1, "Root Region Scan Wait", body_summary->get_root_region_scan_wait_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
@@ -2029,15 +2048,17 @@
           // parallel
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_parallel_seq(),
             body_summary->get_clear_ct_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                3, other_parts);
+                                          4, other_parts);
         } else {
           // serial
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
             body_summary->get_satb_filtering_seq(),
@@ -2045,7 +2066,7 @@
             body_summary->get_obj_copy_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                6, other_parts);
+                                          7, other_parts);
         }
         check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
       }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -65,6 +65,7 @@
 
 class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
+  define_num_seq(root_region_scan_wait)
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
     define_num_seq(satb_filtering)
@@ -177,7 +178,6 @@
   double _cur_collection_start_sec;
   size_t _cur_collection_pause_used_at_start_bytes;
   size_t _cur_collection_pause_used_regions_at_start;
-  size_t _prev_collection_pause_used_at_end_bytes;
   double _cur_collection_par_time_ms;
   double _cur_satb_drain_time_ms;
   double _cur_clear_ct_time_ms;
@@ -716,6 +716,7 @@
   double _mark_remark_start_sec;
   double _mark_cleanup_start_sec;
   double _mark_closure_time_ms;
+  double _root_region_scan_wait_time_ms;
 
   // Update the young list target length either by setting it to the
   // desired fixed value or by calculating it using G1's pause
@@ -800,6 +801,8 @@
 
   GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
 
+  bool need_to_start_conc_mark(const char* source, size_t alloc_word_size = 0);
+
   // Update the heuristic info to record a collection pause of the given
   // start time, where the given number of bytes were used at the start.
   // This may involve changing the desired size of a collection set.
@@ -816,6 +819,10 @@
     _mark_closure_time_ms = mark_closure_time_ms;
   }
 
+  void record_root_region_scan_wait_time(double time_ms) {
+    _root_region_scan_wait_time_ms = time_ms;
+  }
+
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
@@ -1146,11 +1153,6 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
-  void tenure_all_objects() {
-    _max_survivor_regions = 0;
-    _tenuring_threshold = 0;
-  }
-
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -70,16 +70,20 @@
   OopsInHeapRegionClosure *_update_rset_cl;
   bool _during_initial_mark;
   bool _during_conc_mark;
+  uint _worker_id;
+
 public:
   RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
                                  HeapRegion* hr,
                                  OopsInHeapRegionClosure* update_rset_cl,
                                  bool during_initial_mark,
-                                 bool during_conc_mark) :
+                                 bool during_conc_mark,
+                                 uint worker_id) :
     _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
     _update_rset_cl(update_rset_cl),
     _during_initial_mark(during_initial_mark),
-    _during_conc_mark(during_conc_mark) { }
+    _during_conc_mark(during_conc_mark),
+    _worker_id(worker_id) { }
 
   size_t marked_bytes() { return _marked_bytes; }
 
@@ -123,7 +127,7 @@
         // explicitly and all objects in the CSet are considered
         // (implicitly) live. So, we won't mark them explicitly and
         // we'll leave them over NTAMS.
-        _cm->markNext(obj);
+        _cm->grayRoot(obj, obj_size, _worker_id, _hr);
       }
       _marked_bytes += (obj_size * HeapWordSize);
       obj->set_mark(markOopDesc::prototype());
@@ -155,12 +159,14 @@
   G1CollectedHeap* _g1h;
   ConcurrentMark* _cm;
   OopsInHeapRegionClosure *_update_rset_cl;
+  uint _worker_id;
 
 public:
   RemoveSelfForwardPtrHRClosure(G1CollectedHeap* g1h,
-                                OopsInHeapRegionClosure* update_rset_cl) :
+                                OopsInHeapRegionClosure* update_rset_cl,
+                                uint worker_id) :
     _g1h(g1h), _update_rset_cl(update_rset_cl),
-    _cm(_g1h->concurrent_mark()) { }
+    _worker_id(worker_id), _cm(_g1h->concurrent_mark()) { }
 
   bool doHeapRegion(HeapRegion *hr) {
     bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
@@ -173,7 +179,8 @@
       if (hr->evacuation_failed()) {
         RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
                                             during_initial_mark,
-                                            during_conc_mark);
+                                            during_conc_mark,
+                                            _worker_id);
 
         MemRegion mr(hr->bottom(), hr->end());
         // We'll recreate the prev marking info so we'll first clear
@@ -226,7 +233,7 @@
       update_rset_cl = &immediate_update;
     }
 
-    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl);
+    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl, worker_id);
 
     HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
     _g1h->collection_set_iterate_from(hr, &rsfp_cl);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -89,16 +89,15 @@
 //
 // * Min Capacity
 //
-//    We set this to 0 for all spaces. We could consider setting the old
-//    min capacity to the min capacity of the heap (see 7078465).
+//    We set this to 0 for all spaces.
 //
 // * Max Capacity
 //
 //    For jstat, we set the max capacity of all spaces to heap_capacity,
-//    given that we don't always have a reasonably upper bound on how big
-//    each space can grow. For the memory pools, we actually make the max
-//    capacity undefined. We could consider setting the old max capacity
-//    to the max capacity of the heap (see 7078465).
+//    given that we don't always have a reasonable upper bound on how big
+//    each space can grow. For the memory pools, we make the max
+//    capacity undefined with the exception of the old memory pool for
+//    which we make the max capacity same as the max heap capacity.
 //
 // If we had more accurate occupancy / capacity information per
 // region set the above calculations would be greatly simplified and
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -51,6 +51,7 @@
   G1RemSet* _g1_rem;
   ConcurrentMark* _cm;
   G1ParScanThreadState* _par_scan_state;
+  uint _worker_id;
   bool _during_initial_mark;
   bool _mark_in_progress;
 public:
@@ -219,6 +220,7 @@
 
 // Closure for iterating over object fields during concurrent marking
 class G1CMOopClosure : public OopClosure {
+private:
   G1CollectedHeap*   _g1h;
   ConcurrentMark*    _cm;
   CMTask*            _task;
@@ -229,4 +231,92 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
+// Closure to scan the root regions during concurrent marking
+class G1RootRegionScanClosure : public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+  uint _worker_id;
+public:
+  G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm,
+                          uint worker_id) :
+    _g1h(g1h), _cm(cm), _worker_id(worker_id) { }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(      oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+// Closure that applies the given two closures in sequence.
+// Used by the RSet refinement code (when updating RSets
+// during an evacuation pause) to record cards containing
+// pointers into the collection set.
+
+class G1Mux2Closure : public OopClosure {
+  OopClosure* _c1;
+  OopClosure* _c2;
+public:
+  G1Mux2Closure(OopClosure *c1, OopClosure *c2);
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+// A closure that returns true if it is actually applied
+// to a reference
+
+class G1TriggerClosure : public OopClosure {
+  bool _triggered;
+public:
+  G1TriggerClosure();
+  bool triggered() const { return _triggered; }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+// A closure which uses a triggering closure to determine
+// whether to apply an oop closure.
+
+class G1InvokeIfNotTriggeredClosure: public OopClosure {
+  G1TriggerClosure* _trigger_cl;
+  OopClosure* _oop_cl;
+public:
+  G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t, OopClosure* oc);
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class G1UpdateRSOrPushRefOopClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem_set;
+  HeapRegion* _from;
+  OopsInHeapRegionClosure* _push_ref_cl;
+  bool _record_refs_into_cset;
+  int _worker_i;
+
+public:
+  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
+                                G1RemSet* rs,
+                                OopsInHeapRegionClosure* push_ref_cl,
+                                bool record_refs_into_cset,
+                                int worker_i = 0);
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  bool self_forwarded(oop obj) {
+    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
+    return result;
+  }
+
+  bool apply_to_weak_ref_discovered_field() { return true; }
+
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,8 @@
 // perf-critical inner loop.
 #define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
@@ -53,7 +54,8 @@
 
 #define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -67,7 +69,8 @@
 }
 
 // This closure is applied to the fields of the objects that have just been copied.
-template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParScanClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -96,7 +99,8 @@
   }
 }
 
-template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -111,7 +115,8 @@
   }
 }
 
-template <class T> inline void G1CMOopClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1CMOopClosure::do_oop_nv(T* p) {
   assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
   assert(!_g1h->is_on_master_free_list(
                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
@@ -125,4 +130,97 @@
   _task->deal_with_reference(obj);
 }
 
+template <class T>
+inline void G1RootRegionScanClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      _cm->grayRoot(obj, obj->size(), _worker_id, hr);
+    }
+  }
+}
+
+template <class T>
+inline void G1Mux2Closure::do_oop_nv(T* p) {
+  // Apply first closure; then apply the second.
+  _c1->do_oop(p);
+  _c2->do_oop(p);
+}
+
+template <class T>
+inline void G1TriggerClosure::do_oop_nv(T* p) {
+  // Record that this closure was actually applied (triggered).
+  _triggered = true;
+}
+
+template <class T>
+inline void G1InvokeIfNotTriggeredClosure::do_oop_nv(T* p) {
+  if (!_trigger_cl->triggered()) {
+    _oop_cl->do_oop(p);
+  }
+}
+
+template <class T>
+inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
+  oop obj = oopDesc::load_decode_heap_oop(p);
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+
+  assert(_from != NULL, "from region must be non-NULL");
+
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (to != NULL && _from != to) {
+    // The _record_refs_into_cset flag is true during the RSet
+    // updating part of an evacuation pause. It is false at all
+    // other times:
+    //  * rebuilding the rembered sets after a full GC
+    //  * during concurrent refinement.
+    //  * updating the remembered sets of regions in the collection
+    //    set in the event of an evacuation failure (when deferred
+    //    updates are enabled).
+
+    if (_record_refs_into_cset && to->in_collection_set()) {
+      // We are recording references that point into the collection
+      // set and this particular reference does exactly that...
+      // If the referenced object has already been forwarded
+      // to itself, we are handling an evacuation failure and
+      // we have already visited/tried to copy this object
+      // there is no need to retry.
+      if (!self_forwarded(obj)) {
+        assert(_push_ref_cl != NULL, "should not be null");
+        // Push the reference in the refs queue of the G1ParScanThreadState
+        // instance for this worker thread.
+        _push_ref_cl->do_oop(p);
+      }
+
+      // Deferred updates to the CSet are either discarded (in the normal case),
+      // or processed (if an evacuation failure occurs) at the end
+      // of the collection.
+      // See G1RemSet::cleanup_after_oops_into_collection_set_do().
+    } else {
+      // We either don't care about pushing references that point into the
+      // collection set (i.e. we're not during an evacuation pause) _or_
+      // the reference doesn't point into the collection set. Either way
+      // we add the reference directly to the RSet of the region containing
+      // the referenced object.
+      _g1_rem_set->par_write_ref(_from, p, _worker_i);
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -569,40 +569,26 @@
 
 static IntHistogram out_of_histo(50, 50);
 
-class TriggerClosure : public OopClosure {
-  bool _trigger;
-public:
-  TriggerClosure() : _trigger(false) { }
-  bool value() const { return _trigger; }
-  template <class T> void do_oop_nv(T* p) { _trigger = true; }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+
+G1TriggerClosure::G1TriggerClosure() :
+  _triggered(false) { }
+
+G1InvokeIfNotTriggeredClosure::G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t_cl,
+                                                             OopClosure* oop_cl)  :
+  _trigger_cl(t_cl), _oop_cl(oop_cl) { }
 
-class InvokeIfNotTriggeredClosure: public OopClosure {
-  TriggerClosure* _t;
-  OopClosure* _oc;
-public:
-  InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
-    _t(t), _oc(oc) { }
-  template <class T> void do_oop_nv(T* p) {
-    if (!_t->value()) _oc->do_oop(p);
-  }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+G1Mux2Closure::G1Mux2Closure(OopClosure *c1, OopClosure *c2) :
+  _c1(c1), _c2(c2) { }
 
-class Mux2Closure : public OopClosure {
-  OopClosure* _c1;
-  OopClosure* _c2;
-public:
-  Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
-  template <class T> void do_oop_nv(T* p) {
-    _c1->do_oop(p); _c2->do_oop(p);
-  }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+G1UpdateRSOrPushRefOopClosure::
+G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
+                              G1RemSet* rs,
+                              OopsInHeapRegionClosure* push_ref_cl,
+                              bool record_refs_into_cset,
+                              int worker_i) :
+  _g1(g1h), _g1_rem_set(rs), _from(NULL),
+  _record_refs_into_cset(record_refs_into_cset),
+  _push_ref_cl(push_ref_cl), _worker_i(worker_i) { }
 
 bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
                                                    bool check_for_refs_into_cset) {
@@ -629,17 +615,17 @@
     assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length");
     oops_in_heap_closure = _cset_rs_update_cl[worker_i];
   }
-  UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
-                                               _g1->g1_rem_set(),
-                                               oops_in_heap_closure,
-                                               check_for_refs_into_cset,
-                                               worker_i);
+  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
+                                                 _g1->g1_rem_set(),
+                                                 oops_in_heap_closure,
+                                                 check_for_refs_into_cset,
+                                                 worker_i);
   update_rs_oop_cl.set_from(r);
 
-  TriggerClosure trigger_cl;
+  G1TriggerClosure trigger_cl;
   FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
-  InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
-  Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
+  G1InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
+  G1Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
 
   FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
                         (check_for_refs_into_cset ?
@@ -688,7 +674,7 @@
     _conc_refine_cards++;
   }
 
-  return trigger_cl.value();
+  return trigger_cl.triggered();
 }
 
 bool G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -191,44 +191,5 @@
   virtual void do_oop(      oop* p) { do_oop_work(p); }
 };
 
-class UpdateRSOrPushRefOopClosure: public OopClosure {
-  G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem_set;
-  HeapRegion* _from;
-  OopsInHeapRegionClosure* _push_ref_cl;
-  bool _record_refs_into_cset;
-  int _worker_i;
-
-  template <class T> void do_oop_work(T* p);
-
-public:
-  UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                              G1RemSet* rs,
-                              OopsInHeapRegionClosure* push_ref_cl,
-                              bool record_refs_into_cset,
-                              int worker_i = 0) :
-    _g1(g1h),
-    _g1_rem_set(rs),
-    _from(NULL),
-    _record_refs_into_cset(record_refs_into_cset),
-    _push_ref_cl(push_ref_cl),
-    _worker_i(worker_i) { }
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  bool self_forwarded(oop obj) {
-    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
-    return result;
-  }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
-
-  bool apply_to_weak_ref_discovered_field() { return true; }
-};
-
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,66 +85,4 @@
   }
 }
 
-template <class T>
-inline void UpdateRSOrPushRefOopClosure::do_oop_work(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
-#ifdef ASSERT
-  // can't do because of races
-  // assert(obj == NULL || obj->is_oop(), "expected an oop");
-
-  // Do the safe subset of is_oop
-  if (obj != NULL) {
-#ifdef CHECK_UNHANDLED_OOPS
-    oopDesc* o = obj.obj();
-#else
-    oopDesc* o = obj;
-#endif // CHECK_UNHANDLED_OOPS
-    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
-    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
-  }
-#endif // ASSERT
-
-  assert(_from != NULL, "from region must be non-NULL");
-
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (to != NULL && _from != to) {
-    // The _record_refs_into_cset flag is true during the RSet
-    // updating part of an evacuation pause. It is false at all
-    // other times:
-    //  * rebuilding the rembered sets after a full GC
-    //  * during concurrent refinement.
-    //  * updating the remembered sets of regions in the collection
-    //    set in the event of an evacuation failure (when deferred
-    //    updates are enabled).
-
-    if (_record_refs_into_cset && to->in_collection_set()) {
-      // We are recording references that point into the collection
-      // set and this particular reference does exactly that...
-      // If the referenced object has already been forwarded
-      // to itself, we are handling an evacuation failure and
-      // we have already visited/tried to copy this object
-      // there is no need to retry.
-      if (!self_forwarded(obj)) {
-        assert(_push_ref_cl != NULL, "should not be null");
-        // Push the reference in the refs queue of the G1ParScanThreadState
-        // instance for this worker thread.
-        _push_ref_cl->do_oop(p);
-      }
-
-      // Deferred updates to the CSet are either discarded (in the normal case),
-      // or processed (if an evacuation failure occurs) at the end
-      // of the collection.
-      // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-    } else {
-      // We either don't care about pushing references that point into the
-      // collection set (i.e. we're not during an evacuation pause) _or_
-      // the reference doesn't point into the collection set. Either way
-      // we add the reference directly to the RSet of the region containing
-      // the referenced object.
-      _g1_rem_set->par_write_ref(_from, p, _worker_i);
-    }
-  }
-}
-
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,12 +32,14 @@
 
 // Forward declarations.
 enum G1Barrier {
-  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+  G1BarrierNone,
+  G1BarrierRS,
+  G1BarrierEvac
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure;
+
 class G1ParScanClosure;
 class G1ParPushHeapRSClosure;
 
@@ -46,6 +48,13 @@
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class G1CMOopClosure;
+class G1RootRegionScanClosure;
+
+// Specialized oop closures from g1RemSet.cpp
+class G1Mux2Closure;
+class G1TriggerClosure;
+class G1InvokeIfNotTriggeredClosure;
+class G1UpdateRSOrPushRefOopClosure;
 
 #ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
 #error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
@@ -57,7 +66,12 @@
       f(G1ParPushHeapRSClosure,_nv)                     \
       f(FilterIntoCSClosure,_nv)                        \
       f(FilterOutOfRegionClosure,_nv)                   \
-      f(G1CMOopClosure,_nv)
+      f(G1CMOopClosure,_nv)                             \
+      f(G1RootRegionScanClosure,_nv)                    \
+      f(G1Mux2Closure,_nv)                              \
+      f(G1TriggerClosure,_nv)                           \
+      f(G1InvokeIfNotTriggeredClosure,_nv)              \
+      f(G1UpdateRSOrPushRefOopClosure,_nv)
 
 #ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
 #error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -659,7 +659,7 @@
   // If we're within a stop-world GC, then we might look at a card in a
   // GC alloc region that extends onto a GC LAB, which may not be
   // parseable.  Stop such at the "saved_mark" of the region.
-  if (G1CollectedHeap::heap()->is_gc_active()) {
+  if (g1h->is_gc_active()) {
     mr = mr.intersection(used_region_at_save_marks());
   } else {
     mr = mr.intersection(used_region());
@@ -688,53 +688,63 @@
     OrderAccess::storeload();
   }
 
+  // Cache the boundaries of the memory region in some const locals
+  HeapWord* const start = mr.start();
+  HeapWord* const end = mr.end();
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
-  HeapWord* cur = block_start(mr.start());
-  assert(cur <= mr.start(), "Postcondition");
+  HeapWord* cur = block_start(start);
+  assert(cur <= start, "Postcondition");
+
+  oop obj;
 
-  while (cur <= mr.start()) {
-    if (oop(cur)->klass_or_null() == NULL) {
+  HeapWord* next = cur;
+  while (next <= start) {
+    cur = next;
+    obj = oop(cur);
+    if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     }
     // Otherwise...
-    int sz = oop(cur)->size();
-    if (cur + sz > mr.start()) break;
-    // Otherwise, go on.
-    cur = cur + sz;
+    next = (cur + obj->size());
   }
-  oop obj;
-  obj = oop(cur);
-  // If we finish this loop...
-  assert(cur <= mr.start()
-         && obj->klass_or_null() != NULL
-         && cur + obj->size() > mr.start(),
+
+  // If we finish the above loop...We have a parseable object that
+  // begins on or before the start of the memory region, and ends
+  // inside or spans the entire region.
+
+  assert(obj == oop(cur), "sanity");
+  assert(cur <= start &&
+         obj->klass_or_null() != NULL &&
+         (cur + obj->size()) > start,
          "Loop postcondition");
+
   if (!g1h->is_obj_dead(obj)) {
     obj->oop_iterate(cl, mr);
   }
 
-  HeapWord* next;
-  while (cur < mr.end()) {
+  while (cur < end) {
     obj = oop(cur);
     if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     };
+
     // Otherwise:
     next = (cur + obj->size());
+
     if (!g1h->is_obj_dead(obj)) {
-      if (next < mr.end()) {
+      if (next < end || !obj->is_objArray()) {
+        // This object either does not span the MemRegion
+        // boundary, or if it does it's not an array.
+        // Apply closure to whole object.
         obj->oop_iterate(cl);
       } else {
-        // this obj spans the boundary.  If it's an array, stop at the
-        // boundary.
-        if (obj->is_objArray()) {
-          obj->oop_iterate(cl, mr);
-        } else {
-          obj->oop_iterate(cl);
-        }
+        // This obj is an array that spans the boundary.
+        // Stop at the boundary.
+        obj->oop_iterate(cl, mr);
       }
     }
     cur = next;
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -374,7 +374,9 @@
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
     CompleteMarkCSetClaimValue = 6,
-    ParEvacFailureClaimValue   = 7
+    ParEvacFailureClaimValue   = 7,
+    AggregateCountClaimValue   = 8,
+    VerifyCountClaimValue      = 9
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -72,10 +72,11 @@
 }
 
 inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
-  if (during_initial_mark) {
-    if (is_survivor()) {
-      assert(false, "should not allocate survivors during IM");
-    } else {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
       // During initial-mark we'll explicitly mark any objects on old
       // regions that are pointed to by roots. Given that explicit
       // marks only make sense under NTAMS it'd be nice if we could
@@ -84,11 +85,6 @@
       // NTAMS to the end of the region so all marks will be below
       // NTAMS. We'll set it to the actual top when we retire this region.
       _next_top_at_mark_start = end();
-    }
-  } else {
-    if (is_survivor()) {
-      // This is how we always allocate survivors.
-      assert(_next_top_at_mark_start == bottom(), "invariant");
     } else {
       // We could have re-used this old region as to-space over a
       // couple of GCs since the start of the concurrent marking
@@ -101,19 +97,15 @@
 }
 
 inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
-  if (during_initial_mark) {
-    if (is_survivor()) {
-      assert(false, "should not allocate survivors during IM");
-    } else {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
       // See the comment for note_start_of_copying() for the details
       // on this.
       assert(_next_top_at_mark_start == end(), "pre-condition");
       _next_top_at_mark_start = top();
-    }
-  } else {
-    if (is_survivor()) {
-      // This is how we always allocate survivors.
-      assert(_next_top_at_mark_start == bottom(), "invariant");
     } else {
       // See the comment for note_start_of_copying() for the details
       // on this.
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -59,6 +59,7 @@
 class HeapRegionSetBase VALUE_OBJ_CLASS_SPEC {
   friend class hrs_ext_msg;
   friend class HRSPhaseSetter;
+  friend class VMStructs;
 
 protected:
   static size_t calculate_region_num(HeapRegion* hr);
--- a/hotspot/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,8 @@
   nonstatic_field(G1CollectedHeap, _g1_committed,       MemRegion)            \
   nonstatic_field(G1CollectedHeap, _summary_bytes_used, size_t)               \
   nonstatic_field(G1CollectedHeap, _g1mm,               G1MonitoringSupport*) \
+  nonstatic_field(G1CollectedHeap, _old_set,            HeapRegionSetBase)    \
+  nonstatic_field(G1CollectedHeap, _humongous_set,      HeapRegionSetBase)    \
                                                                               \
   nonstatic_field(G1MonitoringSupport, _eden_committed,     size_t)           \
   nonstatic_field(G1MonitoringSupport, _eden_used,          size_t)           \
@@ -47,6 +49,10 @@
   nonstatic_field(G1MonitoringSupport, _survivor_used,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_committed,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_used,           size_t)           \
+                                                                              \
+  nonstatic_field(HeapRegionSetBase,   _length,             size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _region_num,         size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _total_used_bytes,   size_t)           \
 
 
 #define VM_TYPES_G1(declare_type, declare_toplevel_type)                      \
@@ -55,6 +61,7 @@
                                                                               \
   declare_type(HeapRegion, ContiguousSpace)                                   \
   declare_toplevel_type(HeapRegionSeq)                                        \
+  declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(G1MonitoringSupport)                                  \
                                                                               \
   declare_toplevel_type(G1CollectedHeap*)                                     \
--- a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,8 +74,9 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   assert(!_should_initiate_conc_mark ||
   ((_gc_cause == GCCause::_gc_locker && GCLockerInvokesConcurrent) ||
-   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent)),
-         "only a GC locker or a System.gc() induced GC should start a cycle");
+   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) ||
+    _gc_cause == GCCause::_g1_humongous_allocation),
+         "only a GC locker, a System.gc() or a hum allocation induced GC should start a cycle");
 
   if (_word_size > 0) {
     // An allocation has been requested. So, try to do that first.
--- a/hotspot/src/share/vm/gc_interface/gcCause.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_interface/gcCause.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,6 +84,9 @@
     case _g1_inc_collection_pause:
       return "G1 Evacuation Pause";
 
+    case _g1_humongous_allocation:
+      return "G1 Humongous Allocation";
+
     case _last_ditch_collection:
       return "Last ditch collection";
 
--- a/hotspot/src/share/vm/gc_interface/gcCause.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/gc_interface/gcCause.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,6 +66,7 @@
     _adaptive_size_policy,
 
     _g1_inc_collection_pause,
+    _g1_humongous_allocation,
 
     _last_ditch_collection,
     _last_gc_cause
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -859,7 +859,9 @@
   const int branch_bci = branch_bcp != NULL ? method->bci_from(branch_bcp) : InvocationEntryBci;
   const int bci = branch_bcp != NULL ? method->bci_from(fr.interpreter_frame_bcp()) : InvocationEntryBci;
 
+  assert(!HAS_PENDING_EXCEPTION, "Should not have any exceptions pending");
   nmethod* osr_nm = CompilationPolicy::policy()->event(method, method, branch_bci, bci, CompLevel_none, NULL, thread);
+  assert(!HAS_PENDING_EXCEPTION, "Event handler should not throw any exceptions");
 
   if (osr_nm != NULL) {
     // We may need to do on-stack replacement which requires that no
--- a/hotspot/src/share/vm/opto/block.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/opto/block.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -284,13 +284,13 @@
   // helper function that adds caller save registers to MachProjNode
   void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
   // Schedule a call next in the block
-  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call);
+  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
 
   // Perform basic-block local scheduling
-  Node *select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot);
+  Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot);
   void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
   void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
-  bool schedule_local(PhaseCFG *cfg, Matcher &m, int *ready_cnt, VectorSet &next_call);
+  bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
   // Cleanup if any code lands between a Call and his Catch
   void call_catch_cleanup(Block_Array &bbs);
   // Detect implicit-null-check opportunities.  Basically, find NULL checks
--- a/hotspot/src/share/vm/opto/gcm.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/opto/gcm.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1344,8 +1344,8 @@
 
   // Schedule locally.  Right now a simple topological sort.
   // Later, do a real latency aware scheduler.
-  int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique());
-  memset( ready_cnt, -1, C->unique() * sizeof(int) );
+  uint max_idx = C->unique();
+  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
   visited.Clear();
   for (i = 0; i < _num_blocks; i++) {
     if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
--- a/hotspot/src/share/vm/opto/lcm.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/opto/lcm.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -404,7 +404,7 @@
 // remaining cases (most), choose the instruction with the greatest latency
 // (that is, the most number of pseudo-cycles required to the end of the
 // routine). If there is a tie, choose the instruction with the most inputs.
-Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) {
+Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
 
   // If only a single entry on the stack, use it
   uint cnt = worklist.size();
@@ -465,7 +465,7 @@
 
         // More than this instruction pending for successor to be ready,
         // don't choose this if other opportunities are ready
-        if (ready_cnt[use->_idx] > 1)
+        if (ready_cnt.at(use->_idx) > 1)
           n_choice = 1;
       }
 
@@ -565,7 +565,7 @@
 
 
 //------------------------------sched_call-------------------------------------
-uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
+uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
   RegMask regs;
 
   // Schedule all the users of the call right now.  All the users are
@@ -574,8 +574,9 @@
   for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
     Node* n = mcall->fast_out(i);
     assert( n->is_MachProj(), "" );
-    --ready_cnt[n->_idx];
-    assert( !ready_cnt[n->_idx], "" );
+    int n_cnt = ready_cnt.at(n->_idx)-1;
+    ready_cnt.at_put(n->_idx, n_cnt);
+    assert( n_cnt == 0, "" );
     // Schedule next to call
     _nodes.map(node_cnt++, n);
     // Collect defined registers
@@ -590,7 +591,9 @@
       Node* m = n->fast_out(j); // Get user
       if( bbs[m->_idx] != this ) continue;
       if( m->is_Phi() ) continue;
-      if( !--ready_cnt[m->_idx] )
+      int m_cnt = ready_cnt.at(m->_idx)-1;
+      ready_cnt.at_put(m->_idx, m_cnt);
+      if( m_cnt == 0 )
         worklist.push(m);
     }
 
@@ -655,7 +658,7 @@
 
 //------------------------------schedule_local---------------------------------
 // Topological sort within a block.  Someday become a real scheduler.
-bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) {
+bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) {
   // Already "sorted" are the block start Node (as the first entry), and
   // the block-ending Node and any trailing control projections.  We leave
   // these alone.  PhiNodes and ParmNodes are made to follow the block start
@@ -695,7 +698,7 @@
         if( m && cfg->_bbs[m->_idx] == this && !m->is_top() )
           local++;              // One more block-local input
       }
-      ready_cnt[n->_idx] = local; // Count em up
+      ready_cnt.at_put(n->_idx, local); // Count em up
 
 #ifdef ASSERT
       if( UseConcMarkSweepGC || UseG1GC ) {
@@ -729,7 +732,7 @@
     }
   }
   for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count
-    ready_cnt[_nodes[i2]->_idx] = 0;
+    ready_cnt.at_put(_nodes[i2]->_idx, 0);
 
   // All the prescheduled guys do not hold back internal nodes
   uint i3;
@@ -737,8 +740,10 @@
     Node *n = _nodes[i3];       // Get pre-scheduled
     for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
       Node* m = n->fast_out(j);
-      if( cfg->_bbs[m->_idx] ==this ) // Local-block user
-        ready_cnt[m->_idx]--;   // Fix ready count
+      if( cfg->_bbs[m->_idx] ==this ) { // Local-block user
+        int m_cnt = ready_cnt.at(m->_idx)-1;
+        ready_cnt.at_put(m->_idx, m_cnt);   // Fix ready count
+      }
     }
   }
 
@@ -747,7 +752,7 @@
   Node_List worklist;
   for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
     Node *m = _nodes[i4];
-    if( !ready_cnt[m->_idx] ) {   // Zero ready count?
+    if( !ready_cnt.at(m->_idx) ) {   // Zero ready count?
       if (m->is_iteratively_computed()) {
         // Push induction variable increments last to allow other uses
         // of the phi to be scheduled first. The select() method breaks
@@ -775,14 +780,14 @@
       for (uint j=0; j<_nodes.size(); j++) {
         Node     *n = _nodes[j];
         int     idx = n->_idx;
-        tty->print("#   ready cnt:%3d  ", ready_cnt[idx]);
+        tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
         tty->print("latency:%3d  ", cfg->_node_latency->at_grow(idx));
         tty->print("%4d: %s\n", idx, n->Name());
       }
     }
 #endif
 
-  uint max_idx = matcher.C->unique();
+  uint max_idx = (uint)ready_cnt.length();
   // Pull from worklist and schedule
   while( worklist.size() ) {    // Worklist is not ready
 
@@ -840,11 +845,13 @@
       Node* m = n->fast_out(i5); // Get user
       if( cfg->_bbs[m->_idx] != this ) continue;
       if( m->is_Phi() ) continue;
-      if (m->_idx > max_idx) { // new node, skip it
+      if (m->_idx >= max_idx) { // new node, skip it
         assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
         continue;
       }
-      if( !--ready_cnt[m->_idx] )
+      int m_cnt = ready_cnt.at(m->_idx)-1;
+      ready_cnt.at_put(m->_idx, m_cnt);
+      if( m_cnt == 0 )
         worklist.push(m);
     }
   }
--- a/hotspot/src/share/vm/opto/memnode.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1718,8 +1718,10 @@
   bool is_instance = (tinst != NULL) && tinst->is_known_instance_field();
   if (ReduceFieldZeroing || is_instance) {
     Node* value = can_see_stored_value(mem,phase);
-    if (value != NULL && value->is_Con())
+    if (value != NULL && value->is_Con()) {
+      assert(value->bottom_type()->higher_equal(_type),"sanity");
       return value->bottom_type();
+    }
   }
 
   if (is_instance) {
@@ -1759,6 +1761,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadBNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make((con << 24) >> 24);
+  }
+  return LoadNode::Value(phase);
+}
+
 //--------------------------LoadUBNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -1775,6 +1791,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadUBNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make(con & 0xFF);
+  }
+  return LoadNode::Value(phase);
+}
+
 //--------------------------LoadUSNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -1791,6 +1821,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadUSNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make(con & 0xFFFF);
+  }
+  return LoadNode::Value(phase);
+}
+
 //--------------------------LoadSNode::Ideal--------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -1809,6 +1853,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadSNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make((con << 16) >> 16);
+  }
+  return LoadNode::Value(phase);
+}
+
 //=============================================================================
 //----------------------------LoadKlassNode::make------------------------------
 // Polymorphic factory method:
--- a/hotspot/src/share/vm/opto/memnode.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -215,6 +215,7 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
   virtual int store_Opcode() const { return Op_StoreB; }
   virtual BasicType memory_type() const { return T_BYTE; }
 };
@@ -228,6 +229,7 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
   virtual int store_Opcode() const { return Op_StoreB; }
   virtual BasicType memory_type() const { return T_BYTE; }
 };
@@ -241,10 +243,25 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
   virtual int store_Opcode() const { return Op_StoreC; }
   virtual BasicType memory_type() const { return T_CHAR; }
 };
 
+//------------------------------LoadSNode--------------------------------------
+// Load a short (16bits signed) from memory
+class LoadSNode : public LoadNode {
+public:
+  LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
+    : LoadNode(c,mem,adr,at,ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
+  virtual int store_Opcode() const { return Op_StoreC; }
+  virtual BasicType memory_type() const { return T_SHORT; }
+};
+
 //------------------------------LoadINode--------------------------------------
 // Load an integer from memory
 class LoadINode : public LoadNode {
@@ -433,19 +450,6 @@
 };
 
 
-//------------------------------LoadSNode--------------------------------------
-// Load a short (16bits signed) from memory
-class LoadSNode : public LoadNode {
-public:
-  LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
-    : LoadNode(c,mem,adr,at,ti) {}
-  virtual int Opcode() const;
-  virtual uint ideal_reg() const { return Op_RegI; }
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
-  virtual int store_Opcode() const { return Op_StoreC; }
-  virtual BasicType memory_type() const { return T_SHORT; }
-};
-
 //------------------------------StoreNode--------------------------------------
 // Store value; requires Store, Address and Value
 class StoreNode : public MemNode {
--- a/hotspot/src/share/vm/opto/parseHelper.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/opto/parseHelper.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -71,14 +71,14 @@
   // Throw uncommon trap if class is not loaded or the value we are casting
   // _from_ is not loaded, and value is not null.  If the value _is_ NULL,
   // then the checkcast does nothing.
-  const TypeInstPtr *tp = _gvn.type(obj)->isa_instptr();
-  if (!will_link || (tp && !tp->is_loaded())) {
+  const TypeOopPtr *tp = _gvn.type(obj)->isa_oopptr();
+  if (!will_link || (tp && tp->klass() && !tp->klass()->is_loaded())) {
     if (C->log() != NULL) {
       if (!will_link) {
         C->log()->elem("assert_null reason='checkcast' klass='%d'",
                        C->log()->identify(klass));
       }
-      if (tp && !tp->is_loaded()) {
+      if (tp && tp->klass() && !tp->klass()->is_loaded()) {
         // %%% Cannot happen?
         C->log()->elem("assert_null reason='checkcast source' klass='%d'",
                        C->log()->identify(tp->klass()));
--- a/hotspot/src/share/vm/runtime/advancedThresholdPolicy.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/advancedThresholdPolicy.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -271,13 +271,10 @@
 }
 
 // Create MDO if necessary.
-void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) {
+void AdvancedThresholdPolicy::create_mdo(methodHandle mh, JavaThread* THREAD) {
   if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return;
   if (mh->method_data() == NULL) {
-    methodOopDesc::build_interpreter_method_data(mh, THREAD);
-    if (HAS_PENDING_EXCEPTION) {
-      CLEAR_PENDING_EXCEPTION;
-    }
+    methodOopDesc::build_interpreter_method_data(mh, CHECK_AND_CLEAR);
   }
 }
 
@@ -426,22 +423,22 @@
 }
 
 // Update the rate and submit compile
-void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread) {
   int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
   update_rate(os::javaTimeMillis(), mh());
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", thread);
 }
 
 // Handle the invocation event.
 void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
-                                                      CompLevel level, nmethod* nm, TRAPS) {
+                                                      CompLevel level, nmethod* nm, JavaThread* thread) {
   if (should_create_mdo(mh(), level)) {
-    create_mdo(mh, THREAD);
+    create_mdo(mh, thread);
   }
   if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
     CompLevel next_level = call_event(mh(), level);
     if (next_level != level) {
-      compile(mh, InvocationEntryBci, next_level, THREAD);
+      compile(mh, InvocationEntryBci, next_level, thread);
     }
   }
 }
@@ -449,13 +446,13 @@
 // Handle the back branch event. Notice that we can compile the method
 // with a regular entry from here.
 void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
-                                                       int bci, CompLevel level, nmethod* nm, TRAPS) {
+                                                       int bci, CompLevel level, nmethod* nm, JavaThread* thread) {
   if (should_create_mdo(mh(), level)) {
-    create_mdo(mh, THREAD);
+    create_mdo(mh, thread);
   }
   // Check if MDO should be created for the inlined method
   if (should_create_mdo(imh(), level)) {
-    create_mdo(imh, THREAD);
+    create_mdo(imh, thread);
   }
 
   if (is_compilation_enabled()) {
@@ -463,7 +460,7 @@
     CompLevel max_osr_level = (CompLevel)imh->highest_osr_comp_level();
     // At the very least compile the OSR version
     if (!CompileBroker::compilation_is_in_queue(imh, bci) && next_osr_level != level) {
-      compile(imh, bci, next_osr_level, THREAD);
+      compile(imh, bci, next_osr_level, thread);
     }
 
     // Use loop event as an opportunity to also check if there's been
@@ -502,14 +499,14 @@
           next_level = CompLevel_full_profile;
         }
         if (cur_level != next_level) {
-          compile(mh, InvocationEntryBci, next_level, THREAD);
+          compile(mh, InvocationEntryBci, next_level, thread);
         }
       }
     } else {
       cur_level = comp_level(imh());
       next_level = call_event(imh(), cur_level);
       if (!CompileBroker::compilation_is_in_queue(imh, bci) && next_level != cur_level) {
-        compile(imh, InvocationEntryBci, next_level, THREAD);
+        compile(imh, InvocationEntryBci, next_level, thread);
       }
     }
   }
--- a/hotspot/src/share/vm/runtime/advancedThresholdPolicy.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/advancedThresholdPolicy.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -197,7 +197,7 @@
   // determines whether we should do that.
   inline bool should_create_mdo(methodOop method, CompLevel cur_level);
   // Create MDO if necessary.
-  void create_mdo(methodHandle mh, TRAPS);
+  void create_mdo(methodHandle mh, JavaThread* thread);
   // Is method profiled enough?
   bool is_method_profiled(methodOop method);
 
@@ -208,12 +208,12 @@
   jlong start_time() const     { return _start_time; }
 
   // Submit a given method for compilation (and update the rate).
-  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread);
   // event() from SimpleThresholdPolicy would call these.
   virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
-                                       CompLevel level, nmethod* nm, TRAPS);
+                                       CompLevel level, nmethod* nm, JavaThread* thread);
   virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
-                                        int bci, CompLevel level, nmethod* nm, TRAPS);
+                                        int bci, CompLevel level, nmethod* nm, JavaThread* thread);
 public:
   AdvancedThresholdPolicy() : _start_time(0) { }
   // Select task is called by CompileBroker. We should return a task or NULL.
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1040,6 +1040,16 @@
 }
 
 #ifndef KERNEL
+static void disable_adaptive_size_policy(const char* collector_name) {
+  if (UseAdaptiveSizePolicy) {
+    if (FLAG_IS_CMDLINE(UseAdaptiveSizePolicy)) {
+      warning("disabling UseAdaptiveSizePolicy; it is incompatible with %s.",
+              collector_name);
+    }
+    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
+  }
+}
+
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1049,11 +1059,8 @@
          "control point invariant");
   assert(UseParNewGC, "Error");
 
-  // Turn off AdaptiveSizePolicy by default for parnew until it is
-  // complete.
-  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
-    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
-  }
+  // Turn off AdaptiveSizePolicy for parnew until it is complete.
+  disable_adaptive_size_policy("UseParNewGC");
 
   if (ParallelGCThreads == 0) {
     FLAG_SET_DEFAULT(ParallelGCThreads,
@@ -1110,11 +1117,8 @@
     FLAG_SET_ERGO(bool, UseParNewGC, true);
   }
 
-  // Turn off AdaptiveSizePolicy by default for cms until it is
-  // complete.
-  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
-    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
-  }
+  // Turn off AdaptiveSizePolicy for CMS until it is complete.
+  disable_adaptive_size_policy("UseConcMarkSweepGC");
 
   // In either case, adjust ParallelGCThreads and/or UseParNewGC
   // as needed.
--- a/hotspot/src/share/vm/runtime/compilationPolicy.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/compilationPolicy.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -306,29 +306,27 @@
   return (current >= initial + target);
 }
 
-nmethod* NonTieredCompPolicy::event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS) {
+nmethod* NonTieredCompPolicy::event(methodHandle method, methodHandle inlinee, int branch_bci,
+                                    int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread) {
   assert(comp_level == CompLevel_none, "This should be only called from the interpreter");
   NOT_PRODUCT(trace_frequency_counter_overflow(method, branch_bci, bci));
-  if (JvmtiExport::can_post_interpreter_events()) {
-    assert(THREAD->is_Java_thread(), "Wrong type of thread");
-    if (((JavaThread*)THREAD)->is_interp_only_mode()) {
-      // If certain JVMTI events (e.g. frame pop event) are requested then the
-      // thread is forced to remain in interpreted code. This is
-      // implemented partly by a check in the run_compiled_code
-      // section of the interpreter whether we should skip running
-      // compiled code, and partly by skipping OSR compiles for
-      // interpreted-only threads.
-      if (bci != InvocationEntryBci) {
-        reset_counter_for_back_branch_event(method);
-        return NULL;
-      }
+  if (JvmtiExport::can_post_interpreter_events() && thread->is_interp_only_mode()) {
+    // If certain JVMTI events (e.g. frame pop event) are requested then the
+    // thread is forced to remain in interpreted code. This is
+    // implemented partly by a check in the run_compiled_code
+    // section of the interpreter whether we should skip running
+    // compiled code, and partly by skipping OSR compiles for
+    // interpreted-only threads.
+    if (bci != InvocationEntryBci) {
+      reset_counter_for_back_branch_event(method);
+      return NULL;
     }
   }
   if (bci == InvocationEntryBci) {
     // when code cache is full, compilation gets switched off, UseCompiler
     // is set to false
     if (!method->has_compiled_code() && UseCompiler) {
-      method_invocation_event(method, CHECK_NULL);
+      method_invocation_event(method, thread);
     } else {
       // Force counter overflow on method entry, even if no compilation
       // happened.  (The method_invocation_event call does this also.)
@@ -344,7 +342,7 @@
     NOT_PRODUCT(trace_osr_request(method, osr_nm, bci));
     // when code cache is full, we should not compile any more...
     if (osr_nm == NULL && UseCompiler) {
-      method_back_branch_event(method, bci, CHECK_NULL);
+      method_back_branch_event(method, bci, thread);
       osr_nm = method->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true);
     }
     if (osr_nm == NULL) {
@@ -395,7 +393,7 @@
 
 // SimpleCompPolicy - compile current method
 
-void SimpleCompPolicy::method_invocation_event( methodHandle m, TRAPS) {
+void SimpleCompPolicy::method_invocation_event(methodHandle m, JavaThread* thread) {
   int hot_count = m->invocation_count();
   reset_counter_for_invocation_event(m);
   const char* comment = "count";
@@ -405,18 +403,18 @@
     if (nm == NULL ) {
       const char* comment = "count";
       CompileBroker::compile_method(m, InvocationEntryBci, CompLevel_highest_tier,
-                                    m, hot_count, comment, CHECK);
+                                    m, hot_count, comment, thread);
     }
   }
 }
 
-void SimpleCompPolicy::method_back_branch_event(methodHandle m, int bci, TRAPS) {
+void SimpleCompPolicy::method_back_branch_event(methodHandle m, int bci, JavaThread* thread) {
   int hot_count = m->backedge_count();
   const char* comment = "backedge_count";
 
   if (is_compilation_enabled() && !m->is_not_osr_compilable() && can_be_compiled(m)) {
     CompileBroker::compile_method(m, bci, CompLevel_highest_tier,
-                                  m, hot_count, comment, CHECK);
+                                  m, hot_count, comment, thread);
     NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true));)
   }
 }
@@ -427,14 +425,13 @@
 
 
 // Consider m for compilation
-void StackWalkCompPolicy::method_invocation_event(methodHandle m, TRAPS) {
+void StackWalkCompPolicy::method_invocation_event(methodHandle m, JavaThread* thread) {
   int hot_count = m->invocation_count();
   reset_counter_for_invocation_event(m);
   const char* comment = "count";
 
   if (is_compilation_enabled() && m->code() == NULL && can_be_compiled(m)) {
-    ResourceMark rm(THREAD);
-    JavaThread *thread = (JavaThread*)THREAD;
+    ResourceMark rm(thread);
     frame       fr     = thread->last_frame();
     assert(fr.is_interpreted_frame(), "must be interpreted");
     assert(fr.interpreter_frame_method() == m(), "bad method");
@@ -461,17 +458,17 @@
       assert(top != NULL, "findTopInlinableFrame returned null");
       if (TraceCompilationPolicy) top->print();
       CompileBroker::compile_method(top->top_method(), InvocationEntryBci, CompLevel_highest_tier,
-                                    m, hot_count, comment, CHECK);
+                                    m, hot_count, comment, thread);
     }
   }
 }
 
-void StackWalkCompPolicy::method_back_branch_event(methodHandle m, int bci, TRAPS) {
+void StackWalkCompPolicy::method_back_branch_event(methodHandle m, int bci, JavaThread* thread) {
   int hot_count = m->backedge_count();
   const char* comment = "backedge_count";
 
   if (is_compilation_enabled() && !m->is_not_osr_compilable() && can_be_compiled(m)) {
-    CompileBroker::compile_method(m, bci, CompLevel_highest_tier, m, hot_count, comment, CHECK);
+    CompileBroker::compile_method(m, bci, CompLevel_highest_tier, m, hot_count, comment, thread);
 
     NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true));)
   }
--- a/hotspot/src/share/vm/runtime/compilationPolicy.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/compilationPolicy.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -64,7 +64,7 @@
   virtual int compiler_count(CompLevel comp_level) = 0;
   // main notification entry, return a pointer to an nmethod if the OSR is required,
   // returns NULL otherwise.
-  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS) = 0;
+  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread) = 0;
   // safepoint() is called at the end of the safepoint
   virtual void do_safepoint_work() = 0;
   // reprofile request
@@ -105,15 +105,15 @@
   virtual bool is_mature(methodOop method);
   virtual void initialize();
   virtual CompileTask* select_task(CompileQueue* compile_queue);
-  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS);
-  virtual void method_invocation_event(methodHandle m, TRAPS) = 0;
-  virtual void method_back_branch_event(methodHandle m, int bci, TRAPS) = 0;
+  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread);
+  virtual void method_invocation_event(methodHandle m, JavaThread* thread) = 0;
+  virtual void method_back_branch_event(methodHandle m, int bci, JavaThread* thread) = 0;
 };
 
 class SimpleCompPolicy : public NonTieredCompPolicy {
  public:
-  virtual void method_invocation_event(methodHandle m, TRAPS);
-  virtual void method_back_branch_event(methodHandle m, int bci, TRAPS);
+  virtual void method_invocation_event(methodHandle m, JavaThread* thread);
+  virtual void method_back_branch_event(methodHandle m, int bci, JavaThread* thread);
 };
 
 // StackWalkCompPolicy - existing C2 policy
@@ -121,8 +121,8 @@
 #ifdef COMPILER2
 class StackWalkCompPolicy : public NonTieredCompPolicy {
  public:
-  virtual void method_invocation_event(methodHandle m, TRAPS);
-  virtual void method_back_branch_event(methodHandle m, int bci, TRAPS);
+  virtual void method_invocation_event(methodHandle m, JavaThread* thread);
+  virtual void method_back_branch_event(methodHandle m, int bci, JavaThread* thread);
 
  private:
   RFrame* findTopInlinableFrame(GrowableArray<RFrame*>* stack);
--- a/hotspot/src/share/vm/runtime/frame.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/frame.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1315,7 +1315,6 @@
 }
 #endif
 
-
 #ifdef ASSERT
 void frame::interpreter_frame_verify_monitor(BasicObjectLock* value) const {
   assert(is_interpreted_frame(), "Not an interpreted frame");
@@ -1331,24 +1330,35 @@
   guarantee((current - low_mark) % monitor_size  ==  0         , "Misaligned bottom of BasicObjectLock*");
   guarantee( current >= low_mark                               , "Current BasicObjectLock* below than low_mark");
 }
+#endif
 
+#ifndef PRODUCT
+void frame::describe(FrameValues& values, int frame_no) {
+  // boundaries: sp and the 'real' frame pointer
+  values.describe(-1, sp(), err_msg("sp for #%d", frame_no), 1);
+  intptr_t* frame_pointer = real_fp(); // Note: may differ from fp()
 
-void frame::describe(FrameValues& values, int frame_no) {
-  intptr_t* frame_pointer = real_fp();
+  // print frame info at the highest boundary
+  intptr_t* info_address = MAX2(sp(), frame_pointer);
+
+  if (info_address != frame_pointer) {
+    // print frame_pointer explicitly if not marked by the frame info
+    values.describe(-1, frame_pointer, err_msg("frame pointer for #%d", frame_no), 1);
+  }
+
   if (is_entry_frame() || is_compiled_frame() || is_interpreted_frame() || is_native_frame()) {
     // Label values common to most frames
     values.describe(-1, unextended_sp(), err_msg("unextended_sp for #%d", frame_no));
-    values.describe(-1, sp(), err_msg("sp for #%d", frame_no));
-    values.describe(-1, frame_pointer, err_msg("frame pointer for #%d", frame_no));
   }
+
   if (is_interpreted_frame()) {
     methodOop m = interpreter_frame_method();
     int bci = interpreter_frame_bci();
 
     // Label the method and current bci
-    values.describe(-1, MAX2(sp(), frame_pointer),
+    values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d method %s @ %d", frame_no, m->name_and_sig_as_C_string(), bci), 2);
-    values.describe(-1, MAX2(sp(), frame_pointer),
+    values.describe(-1, info_address,
                     err_msg("- %d locals %d max stack", m->max_locals(), m->max_stack()), 1);
     if (m->max_locals() > 0) {
       intptr_t* l0 = interpreter_frame_local_at(0);
@@ -1380,21 +1390,36 @@
     }
   } else if (is_entry_frame()) {
     // For now just label the frame
-    values.describe(-1, MAX2(sp(), frame_pointer), err_msg("#%d entry frame", frame_no), 2);
+    values.describe(-1, info_address, err_msg("#%d entry frame", frame_no), 2);
   } else if (is_compiled_frame()) {
     // For now just label the frame
     nmethod* nm = cb()->as_nmethod_or_null();
-    values.describe(-1, MAX2(sp(), frame_pointer),
+    values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d nmethod " INTPTR_FORMAT " for method %s%s", frame_no,
                                        nm, nm->method()->name_and_sig_as_C_string(),
-                                       is_deoptimized_frame() ? " (deoptimized" : ""), 2);
+                                       (_deopt_state == is_deoptimized) ?
+                                       " (deoptimized)" :
+                                       ((_deopt_state == unknown) ? " (state unknown)" : "")),
+                    2);
   } else if (is_native_frame()) {
     // For now just label the frame
     nmethod* nm = cb()->as_nmethod_or_null();
-    values.describe(-1, MAX2(sp(), frame_pointer),
+    values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d nmethod " INTPTR_FORMAT " for native method %s", frame_no,
                                        nm, nm->method()->name_and_sig_as_C_string()), 2);
+  } else if (is_ricochet_frame()) {
+      values.describe(-1, info_address, err_msg("#%d ricochet frame", frame_no), 2);
+  } else {
+    // provide default info if not handled before
+    char *info = (char *) "special frame";
+    if ((_cb != NULL) &&
+        (_cb->name() != NULL)) {
+      info = (char *)_cb->name();
+    }
+    values.describe(-1, info_address, err_msg("#%d <%s>", frame_no, info), 2);
   }
+
+  // platform dependent additional data
   describe_pd(values, frame_no);
 }
 
@@ -1411,7 +1436,7 @@
 }
 
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 void FrameValues::describe(int owner, intptr_t* location, const char* description, int priority) {
   FrameValue fv;
@@ -1424,6 +1449,7 @@
 }
 
 
+#ifdef ASSERT
 void FrameValues::validate() {
   _values.sort(compare);
   bool error = false;
@@ -1449,7 +1475,7 @@
   }
   assert(!error, "invalid layout");
 }
-
+#endif // ASSERT
 
 void FrameValues::print(JavaThread* thread) {
   _values.sort(compare);
@@ -1498,4 +1524,4 @@
   }
 }
 
-#endif
+#endif // ndef PRODUCT
--- a/hotspot/src/share/vm/runtime/frame.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/frame.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -494,7 +494,7 @@
 
 };
 
-#ifdef ASSERT
+#ifndef PRODUCT
 // A simple class to describe a location on the stack
 class FrameValue VALUE_OBJ_CLASS_SPEC {
  public:
@@ -524,7 +524,9 @@
   // Used by frame functions to describe locations.
   void describe(int owner, intptr_t* location, const char* description, int priority = 0);
 
+#ifdef ASSERT
   void validate();
+#endif
   void print(JavaThread* thread);
 };
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -3589,7 +3589,7 @@
           "Threshold at which tier 3 compilation is invoked (invocation "   \
           "minimum must be satisfied.")                                     \
                                                                             \
-  product(intx, Tier3BackEdgeThreshold,  7000,                              \
+  product(intx, Tier3BackEdgeThreshold,  60000,                             \
           "Back edge threshold at which tier 3 OSR compilation is invoked") \
                                                                             \
   product(intx, Tier4InvocationThreshold, 5000,                             \
--- a/hotspot/src/share/vm/runtime/mutexLocker.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,6 +126,7 @@
 Mutex*   FreeList_lock                = NULL;
 Monitor* SecondaryFreeList_lock       = NULL;
 Mutex*   OldSets_lock                 = NULL;
+Monitor* RootRegionScan_lock          = NULL;
 Mutex*   MMUTracker_lock              = NULL;
 Mutex*   HotCardCache_lock            = NULL;
 
@@ -199,6 +200,7 @@
     def(FreeList_lock              , Mutex,   leaf     ,   true );
     def(SecondaryFreeList_lock     , Monitor, leaf     ,   true );
     def(OldSets_lock               , Mutex  , leaf     ,   true );
+    def(RootRegionScan_lock        , Monitor, leaf     ,   true );
     def(MMUTracker_lock            , Mutex  , leaf     ,   true );
     def(HotCardCache_lock          , Mutex  , special  ,   true );
     def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
--- a/hotspot/src/share/vm/runtime/mutexLocker.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,7 +115,7 @@
 
 #ifndef PRODUCT
 extern Mutex*   FullGCALot_lock;                 // a lock to make FullGCALot MT safe
-#endif
+#endif // PRODUCT
 extern Mutex*   Debug1_lock;                     // A bunch of pre-allocated locks that can be used for tracing
 extern Mutex*   Debug2_lock;                     // down synchronization related bugs!
 extern Mutex*   Debug3_lock;
@@ -129,6 +129,7 @@
 extern Mutex*   FreeList_lock;                   // protects the free region list during safepoints
 extern Monitor* SecondaryFreeList_lock;          // protects the secondary free region list
 extern Mutex*   OldSets_lock;                    // protects the old region sets
+extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                  // tracker data structures
 extern Mutex*   HotCardCache_lock;               // protects the hot card cache
--- a/hotspot/src/share/vm/runtime/simpleThresholdPolicy.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/simpleThresholdPolicy.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -177,13 +177,11 @@
 }
 
 nmethod* SimpleThresholdPolicy::event(methodHandle method, methodHandle inlinee,
-                                      int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS) {
+                                      int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread) {
   if (comp_level == CompLevel_none &&
-      JvmtiExport::can_post_interpreter_events()) {
-    assert(THREAD->is_Java_thread(), "Should be java thread");
-    if (((JavaThread*)THREAD)->is_interp_only_mode()) {
-      return NULL;
-    }
+      JvmtiExport::can_post_interpreter_events() &&
+      thread->is_interp_only_mode()) {
+    return NULL;
   }
   nmethod *osr_nm = NULL;
 
@@ -197,9 +195,9 @@
   }
 
   if (bci == InvocationEntryBci) {
-    method_invocation_event(method, inlinee, comp_level, nm, THREAD);
+    method_invocation_event(method, inlinee, comp_level, nm, thread);
   } else {
-    method_back_branch_event(method, inlinee, bci, comp_level, nm, THREAD);
+    method_back_branch_event(method, inlinee, bci, comp_level, nm, thread);
     // method == inlinee if the event originated in the main method
     int highest_level = inlinee->highest_osr_comp_level();
     if (highest_level > comp_level) {
@@ -210,7 +208,7 @@
 }
 
 // Check if the method can be compiled, change level if necessary
-void SimpleThresholdPolicy::compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+void SimpleThresholdPolicy::compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread) {
   assert(level <= TieredStopAtLevel, "Invalid compilation level");
   if (level == CompLevel_none) {
     return;
@@ -221,7 +219,7 @@
   // pure C1.
   if (!can_be_compiled(mh, level)) {
     if (level == CompLevel_full_optimization && can_be_compiled(mh, CompLevel_simple)) {
-        compile(mh, bci, CompLevel_simple, THREAD);
+        compile(mh, bci, CompLevel_simple, thread);
     }
     return;
   }
@@ -232,14 +230,14 @@
     if (PrintTieredEvents) {
       print_event(COMPILE, mh, mh, bci, level);
     }
-    submit_compile(mh, bci, level, THREAD);
+    submit_compile(mh, bci, level, thread);
   }
 }
 
 // Tell the broker to compile the method
-void SimpleThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+void SimpleThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread) {
   int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", thread);
 }
 
 // Call and loop predicates determine whether a transition to a higher
@@ -366,11 +364,11 @@
 
 // Handle the invocation event.
 void SimpleThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
-                                              CompLevel level, nmethod* nm, TRAPS) {
+                                              CompLevel level, nmethod* nm, JavaThread* thread) {
   if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
     CompLevel next_level = call_event(mh(), level);
     if (next_level != level) {
-      compile(mh, InvocationEntryBci, next_level, THREAD);
+      compile(mh, InvocationEntryBci, next_level, thread);
     }
   }
 }
@@ -378,7 +376,7 @@
 // Handle the back branch event. Notice that we can compile the method
 // with a regular entry from here.
 void SimpleThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
-                                                     int bci, CompLevel level, nmethod* nm, TRAPS) {
+                                                     int bci, CompLevel level, nmethod* nm, JavaThread* thread) {
   // If the method is already compiling, quickly bail out.
   if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) {
     // Use loop event as an opportinity to also check there's been
@@ -391,13 +389,13 @@
                       next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level);
     bool is_compiling = false;
     if (next_level != cur_level) {
-      compile(mh, InvocationEntryBci, next_level, THREAD);
+      compile(mh, InvocationEntryBci, next_level, thread);
       is_compiling = true;
     }
 
     // Do the OSR version
     if (!is_compiling && next_osr_level != level) {
-      compile(mh, bci, next_osr_level, THREAD);
+      compile(mh, bci, next_osr_level, thread);
     }
   }
 }
--- a/hotspot/src/share/vm/runtime/simpleThresholdPolicy.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/runtime/simpleThresholdPolicy.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -67,9 +67,9 @@
   // Print policy-specific information if necessary
   virtual void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level) { }
   // Check if the method can be compiled, change level if necessary
-  void compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  void compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread);
   // Submit a given method for compilation
-  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread);
   // Simple methods are as good being compiled with C1 as C2.
   // This function tells if it's such a function.
   inline bool is_trivial(methodOop method);
@@ -88,9 +88,9 @@
     return CompLevel_none;
   }
   virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
-                                       CompLevel level, nmethod* nm, TRAPS);
+                                       CompLevel level, nmethod* nm, JavaThread* thread);
   virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
-                                        int bci, CompLevel level, nmethod* nm, TRAPS);
+                                        int bci, CompLevel level, nmethod* nm, JavaThread* thread);
 public:
   SimpleThresholdPolicy() : _c1_count(0), _c2_count(0) { }
   virtual int compiler_count(CompLevel comp_level) {
@@ -104,7 +104,7 @@
   virtual void disable_compilation(methodOop method) { }
   virtual void reprofile(ScopeDesc* trap_scope, bool is_osr);
   virtual nmethod* event(methodHandle method, methodHandle inlinee,
-                         int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS);
+                         int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread);
   // Select task is called by CompileBroker. We should return a task or NULL.
   virtual CompileTask* select_task(CompileQueue* compile_queue);
   // Tell the runtime if we think a given method is adequately profiled.
--- a/hotspot/src/share/vm/services/g1MemoryPool.cpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/services/g1MemoryPool.cpp	Fri Feb 03 14:04:59 2012 -0500
@@ -78,7 +78,7 @@
   G1MemoryPoolSuper(g1h,
                     "G1 Old Gen",
                     g1h->g1mm()->old_space_committed(), /* init_size */
-                    _undefined_max,
+                    g1h->g1mm()->old_gen_max(),
                     true /* support_usage_threshold */) { }
 
 MemoryUsage G1OldGenPool::get_memory_usage() {
--- a/hotspot/src/share/vm/services/g1MemoryPool.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/services/g1MemoryPool.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -101,7 +101,7 @@
     return _g1mm->old_space_used();
   }
   size_t max_size() const {
-    return _undefined_max;
+    return _g1mm->old_gen_max();
   }
   MemoryUsage get_memory_usage();
 };
--- a/hotspot/src/share/vm/utilities/bitMap.inline.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/utilities/bitMap.inline.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,8 +178,30 @@
     for (; !(res & 1); res_offset++) {
       res = res >> 1;
     }
-    assert(res_offset >= l_offset &&
-           res_offset < r_offset, "just checking");
+
+#ifdef ASSERT
+    // In the following assert, if r_offset is not bitamp word aligned,
+    // checking that res_offset is strictly less than r_offset is too
+    // strong and will trip the assert.
+    //
+    // Consider the case where l_offset is bit 15 and r_offset is bit 17
+    // of the same map word, and where bits [15:16:17:18] == [00:00:00:01].
+    // All the bits in the range [l_offset:r_offset) are 0.
+    // The loop that calculates res_offset, above, would yield the offset
+    // of bit 18 because it's in the same map word as l_offset and there
+    // is a set bit in that map word above l_offset (i.e. res != NoBits).
+    //
+    // In this case, however, we can assert is that res_offset is strictly
+    // less than size() since we know that there is at least one set bit
+    // at an offset above, but in the same map word as, r_offset.
+    // Otherwise, if r_offset is word aligned then it will not be in the
+    // same map word as l_offset (unless it equals l_offset). So either
+    // there won't be a set bit between l_offset and the end of it's map
+    // word (i.e. res == NoBits), or res_offset will be less than r_offset.
+
+    idx_t limit = is_word_aligned(r_offset) ? r_offset : size();
+    assert(res_offset >= l_offset && res_offset < limit, "just checking");
+#endif // ASSERT
     return MIN2(res_offset, r_offset);
   }
   // skip over all word length 0-bit runs
--- a/hotspot/src/share/vm/utilities/exceptions.hpp	Wed Feb 01 15:01:08 2012 -0500
+++ b/hotspot/src/share/vm/utilities/exceptions.hpp	Fri Feb 03 14:04:59 2012 -0500
@@ -189,6 +189,13 @@
 #define CHECK_NULL                               CHECK_(NULL)
 #define CHECK_false                              CHECK_(false)
 
+#define CHECK_AND_CLEAR                         THREAD); if (HAS_PENDING_EXCEPTION) { CLEAR_PENDING_EXCEPTION; return;        } (0
+#define CHECK_AND_CLEAR_(result)                THREAD); if (HAS_PENDING_EXCEPTION) { CLEAR_PENDING_EXCEPTION; return result; } (0
+#define CHECK_AND_CLEAR_0                       CHECK_AND_CLEAR_(0)
+#define CHECK_AND_CLEAR_NH                      CHECK_AND_CLEAR_(Handle())
+#define CHECK_AND_CLEAR_NULL                    CHECK_AND_CLEAR_(NULL)
+#define CHECK_AND_CLEAR_false                   CHECK_AND_CLEAR_(false)
+
 // The THROW... macros should be used to throw an exception. They require a THREAD variable to be
 // visible within the scope containing the THROW. Usually this is achieved by declaring the function
 // with a TRAPS argument.
@@ -258,7 +265,6 @@
     ShouldNotReachHere();                  \
   } (0
 
-
 // ExceptionMark is a stack-allocated helper class for local exception handling.
 // It is used with the EXCEPTION_MARK macro.
 
--- a/jaxp/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/jaxp/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 f052abb8f37444ba77858913887d0d92795dd6b8 jdk8-b20
 d41eeadf5c1344b88c5051a997aec9e1ad7ce1db jdk8-b21
 cf9d6ec44f891236ad18451021d6dcd57dc82f7b jdk8-b22
+95102fd334183d15dc98a95dd0d749527b6c7300 jdk8-b23
--- a/jaxws/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/jaxws/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 2b2818e3386f4510c390f6aea90d77e1c6a5bf9e jdk8-b20
 c266cab0e3fff05f2048c23046c14d60f7102175 jdk8-b21
 8d3df89b0f2d3c603b2edb0f5e24af1245397cc6 jdk8-b22
+25ce7a0004874273f6aeda14e7c3538cba34bdf1 jdk8-b23
--- a/jdk/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/jdk/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 39e938cd1b82ec3aab0a9aa66fd8a0457cd0c9c2 jdk8-b20
 664fa4fb0ee411ef048903c479f8b962fcdb2f4b jdk8-b21
 dda27c73d8db4a9c7a23872b6f0c5106edcb2021 jdk8-b22
+54202e0148ec7d4570cab5bc9b00d216a7677569 jdk8-b23
--- a/langtools/.hgtags	Wed Feb 01 15:01:08 2012 -0500
+++ b/langtools/.hgtags	Fri Feb 03 14:04:59 2012 -0500
@@ -144,3 +144,4 @@
 ffd294128a48cbb90ce8f0569f82b61f1f164a18 jdk8-b20
 bcb21abf1c4177baf4574f99709513dcd4474727 jdk8-b21
 390a7828ae18324030c0546b6452d51093ffa451 jdk8-b22
+601ffcc6551d5414ef871be306c3a26396cf16a7 jdk8-b23